Skip to content

Commit 3fe64de

Browse files
committed
Initial POC
0 parents  commit 3fe64de

File tree

9 files changed

+460
-0
lines changed

9 files changed

+460
-0
lines changed

.github/workflows/release.yml

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
name: release
2+
on:
3+
push:
4+
tags:
5+
- "[0-9]+.[0-9]+.[0-9]+"
6+
jobs:
7+
create-release:
8+
name: create-release
9+
runs-on: ubuntu-latest
10+
outputs:
11+
upload_url: ${{ steps.release.outputs.upload_url }}
12+
fl_version: ${{ env.FL_VERSION }}
13+
steps:
14+
- name: Get the release version from the tag
15+
shell: bash
16+
if: env.FL_VERSION == ''
17+
run: |
18+
echo "FL_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
19+
echo "version is: ${{ env.FL_VERSION }}"
20+
- name: Create GitHub release
21+
id: release
22+
uses: actions/create-release@v1
23+
env:
24+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
25+
with:
26+
tag_name: ${{ env.FL_VERSION }}
27+
release_name: ${{ env.FL_VERSION }}
28+
29+
build-release:
30+
name: build-release
31+
needs: ['create-release']
32+
runs-on: ${{ matrix.os }}
33+
env:
34+
CARGO: cargo
35+
TARGET_FLAGS: ""
36+
TARGET_DIR: ./target
37+
RUST_BACKTRACE: 1
38+
strategy:
39+
matrix:
40+
build: [linux, linux-arm, macos]
41+
include:
42+
- build: linux
43+
os: ubuntu-latest
44+
rust: stable
45+
target: x86_64-unknown-linux-musl
46+
- build: linux-arm
47+
os: ubuntu-latest
48+
rust: stable
49+
target: arm-unknown-linux-gnueabihf
50+
- build: macos
51+
os: macos-latest
52+
rust: stable
53+
target: x86_64-apple-darwin
54+
55+
steps:
56+
- name: Checkout repository
57+
uses: actions/checkout@v2
58+
with:
59+
fetch-depth: 1
60+
61+
- name: Install Rust
62+
uses: dtolnay/rust-toolchain@v1
63+
with:
64+
toolchain: ${{ matrix.rust }}
65+
target: ${{ matrix.target }}
66+
67+
- name: Use Cross
68+
shell: bash
69+
run: |
70+
cargo install cross
71+
echo "CARGO=cross" >> $GITHUB_ENV
72+
echo "TARGET_FLAGS=--target ${{ matrix.target }}" >> $GITHUB_ENV
73+
echo "TARGET_DIR=./target/${{ matrix.target }}" >> $GITHUB_ENV
74+
75+
- name: Show command used for Cargo
76+
run: |
77+
echo "cargo command is: ${{ env.CARGO }}"
78+
echo "target flag is: ${{ env.TARGET_FLAGS }}"
79+
echo "target dir is: ${{ env.TARGET_DIR }}"
80+
81+
- name: Build release binary
82+
run: ${{ env.CARGO }} build --verbose --release ${{ env.TARGET_FLAGS }}
83+
84+
- name: Strip release binary (linux and macos)
85+
if: matrix.build == 'linux' || matrix.build == 'macos'
86+
run: strip "target/${{ matrix.target }}/release/findlargedir"
87+
88+
- name: Strip release binary (arm)
89+
if: matrix.build == 'linux-arm'
90+
run: |
91+
docker run --rm -v \
92+
"$PWD/target:/target:Z" \
93+
rustembedded/cross:arm-unknown-linux-gnueabihf \
94+
arm-linux-gnueabihf-strip \
95+
/target/arm-unknown-linux-gnueabihf/release/findlargedir
96+
97+
- name: Build archive
98+
shell: bash
99+
run: |
100+
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
101+
staging="findlargedir-${{ needs.create-release.outputs.fl_version }}-${{ matrix.target }}"
102+
cp {README.md,LICENSE} "$staging/"
103+
104+
- name: Upload release archive
105+
uses: actions/[email protected]
106+
env:
107+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
108+
with:
109+
upload_url: ${{ needs.create-release.outputs.upload_url }}
110+
asset_path: ${{ env.ASSET }}
111+
asset_name: ${{ env.ASSET }}
112+
asset_content_type: application/octet-stream

.gitignore

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
### Rust ###
2+
# Generated by Cargo
3+
# will have compiled files and executables
4+
debug/
5+
target/
6+
7+
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
8+
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
9+
Cargo.lock
10+
11+
# These are backup files generated by rustfmt
12+
**/*.rs.bk
13+
14+
# MSVC Windows builds of rustc generate these, which store debugging information
15+
*.pdb
16+
17+
# IntelliJ tools
18+
.idea/
19+
20+
# End of https://www.toptal.com/developers/gitignore/api/rust

Cargo.toml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
[package]
2+
name = "findlargedir"
3+
version = "0.1.0"
4+
authors = ["Dinko Korunic <[email protected]>"]
5+
categories = ["command-line-utilities"]
6+
description = "find all blackhole directories with a huge amount of filesystem entries in a flat structure"
7+
repository = "https://github.com/dkorunic/findlargedir"
8+
readme = "README.md"
9+
license = "MIT"
10+
exclude = [".gitignore"]
11+
edition = "2021"
12+
13+
[dependencies]
14+
jwalk = "0.6.0"
15+
rayon = "1.5.3"
16+
tikv-jemallocator = "0.5.0"
17+
num_cpus = "1.13.1"
18+
tempfile = "3.3.0"
19+
spinach = "2.1.0"
20+
anyhow = "1.0.65"
21+
human_format = "1.0.3"
22+
human_bytes = { version = "0.3.1", features = ["fast"] }
23+
clap = { version = "3.2.22", features = ["derive"] }
24+
ctrlc = { version = "3.2.3", features = ["termination"] }
25+
rm_rf = "0.6.2"
26+
ansi_term = "0.12.1"
27+
28+
[profile.release]
29+
lto = true

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2022 Dinko Korunic
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
THE SOFTWARE.

README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
findlargedir
2+
===
3+
4+
[![GitHub license](https://img.shields.io/github/license/dkorunic/findlargedir.svg)](https://github.com/dkorunic/findlargedir/blob/master/LICENSE.txt)
5+
[![GitHub release](https://img.shields.io/github/release/dkorunic/findlargedir.svg)](https://github.com/dkorunic/findlargedir/releases/latest)
6+
7+
## About
8+
9+
Findlargedir is a quick hack intended to help identifying "black hole" directories on an any filesystem having more than 100,000 entries in a single flat structure. Program will attempt to identify any number of such events and report on them.
10+
11+
Program will **not follow symlinks** and **requires r/w permissions** to be able to calculate a directory inode size to number of entries ratio and estimate a number of entries in a directory without actually counting them. While this method is just an approximation of the actual number of entries in a directory, it is good enough to quickly scan for offending directories.
12+
13+
## Caveats
14+
15+
* requires r/w privileges for an each filesystem being tested, it will also create a temporary directory with a lot of temporary files which are cleaned up afterwards
16+
* accurate mode (`-a`) can cause an excessive I/O and an excessive memory use; only use when appropriate
17+
18+
19+
## Usage
20+
21+
```shell
22+
USAGE:
23+
findlargedir [OPTIONS] <PATH>...
24+
25+
ARGS:
26+
<PATH>...
27+
28+
OPTIONS:
29+
-a, --accurate <accurate> [default: false]
30+
-A, --alert-threshold <ALERT_THRESHOLD> [default: 10000]
31+
-B, --blacklist-threshold <BLACKLIST_THRESHOLD> [default: 100000]
32+
-c, --calibration-count <CALIBRATION_COUNT> [default: 10000]
33+
-h, --help Print help information
34+
-o, --one-filesystem <one-filesystem> [default: true]
35+
-V, --version Print version information
36+
```
37+
38+
When using **accurate mode** (`-a` parameter) beware that large directory lookups will stall the process completely for extended periods of time. What this mode does is basically a secondary fully accurate pass on a possibly offending directory calculating exact number of entries.
39+
40+
If you want to avoid descending into mounted filesystems (as in find -xdev option), use **onefilesystem mode** with `-o` parameter and this toggled by default.

src/calibrate.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
use anyhow::{Context, Error};
2+
use rm_rf::ensure_removed;
3+
use spinach::Spinach;
4+
use std::fs;
5+
use std::fs::File;
6+
use std::os::unix::fs::MetadataExt;
7+
use std::path::Path;
8+
use std::process;
9+
use std::sync::atomic::{AtomicBool, Ordering};
10+
use std::sync::Arc;
11+
12+
pub const DEFAULT_TEST_COUNT: u64 = 10000;
13+
const ERROR_EXIT: i32 = 1;
14+
15+
pub fn get_inode_ratio(
16+
test_path: &Path,
17+
shutdown: &Arc<AtomicBool>,
18+
test_count: u64,
19+
) -> Result<u64, Error> {
20+
println!(
21+
"Running test directory calibration in: {}",
22+
test_path.display(),
23+
);
24+
25+
let s = Spinach::new("Starting calibration...");
26+
27+
for i in 0..test_count {
28+
if shutdown.load(Ordering::Relaxed) {
29+
s.stop();
30+
println!("Requested program exit, stopping and deleting temporary files...",);
31+
ensure_removed(test_path)
32+
.expect("Unable to completely delete calibration directory, exiting");
33+
process::exit(ERROR_EXIT);
34+
}
35+
36+
File::create(test_path.join(i.to_string()))
37+
.with_context(|| format!("Unable to create calibration test file {}", i))?;
38+
if i % 1000 == 0 {
39+
s.text(format!("Created {} files...", i));
40+
}
41+
}
42+
43+
s.text("Done, getting total size and deleting temp folder");
44+
45+
let tmp_dir_size = fs::metadata(test_path)
46+
.with_context(|| format!("Unable to stat {} directory", test_path.display()))?
47+
.size();
48+
49+
s.succeed("Finished with calibration.");
50+
51+
let size_inode_ratio = tmp_dir_size / test_count;
52+
println!("Calculated size-to-inode ratio: {}", size_inode_ratio);
53+
54+
Ok(size_inode_ratio)
55+
}

src/interrupt.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
use anyhow::{Context, Error};
2+
use std::sync::atomic::{AtomicBool, Ordering};
3+
use std::sync::Arc;
4+
5+
pub fn setup_interrupt_handler(shutdown: Arc<AtomicBool>) -> Result<(), Error> {
6+
ctrlc::set_handler(move || {
7+
shutdown.store(true, Ordering::Relaxed);
8+
})
9+
.context("Unable to set Ctrl-C handler")?;
10+
11+
Ok(())
12+
}

src/main.rs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#![warn(clippy::all, clippy::pedantic)]
2+
mod calibrate;
3+
mod interrupt;
4+
mod walk;
5+
6+
use anyhow::{Context, Error, Result};
7+
use clap::Parser;
8+
use std::collections::HashSet;
9+
use std::fs;
10+
use std::sync::atomic::AtomicBool;
11+
use std::sync::Arc;
12+
use tempfile::TempDir;
13+
use tikv_jemallocator::Jemalloc;
14+
15+
#[global_allocator]
16+
static GLOBAL: Jemalloc = Jemalloc;
17+
18+
#[derive(Parser, Debug)]
19+
#[clap(author, version, about, long_about = None)]
20+
struct Args {
21+
#[clap(short, long, default_value_t = calibrate::DEFAULT_TEST_COUNT)]
22+
calibration_count: u64,
23+
24+
#[clap(short, long, default_value_t = false)]
25+
accurate: bool,
26+
27+
#[clap(short, long, default_value_t = true)]
28+
one_filesystem: bool,
29+
30+
#[clap(short = 'A', long, default_value_t = walk::ALERT_COUNT)]
31+
alert_threshold: u64,
32+
33+
#[clap(short = 'B', long, default_value_t = walk::BLACKLIST_COUNT)]
34+
blacklist_threshold: u64,
35+
36+
#[clap(required = true, allow_hyphen_values = true)]
37+
path: Vec<String>,
38+
}
39+
40+
fn main() -> Result<(), Error> {
41+
let args = Args::parse();
42+
43+
let shutdown = Arc::new(AtomicBool::new(false));
44+
let shutdown_calibrate = shutdown.clone();
45+
interrupt::setup_interrupt_handler(shutdown)?;
46+
47+
let mut visited_paths = HashSet::new();
48+
49+
for path in args.path {
50+
match visited_paths.get(&path) {
51+
None => visited_paths.insert(path.clone()),
52+
_ => continue,
53+
};
54+
55+
let path_metadata =
56+
fs::metadata(&path).with_context(|| format!("Unable to stat {} directory", &path))?;
57+
58+
let tmp_dir = Arc::new(
59+
TempDir::new_in(&path).context("Unable to setup/create calibration test directory")?,
60+
);
61+
62+
let size_inode_ratio =
63+
calibrate::get_inode_ratio(tmp_dir.path(), &shutdown_calibrate, args.calibration_count)
64+
.context("Unable to calibrate inode to size ratio")?;
65+
drop(tmp_dir);
66+
67+
println!("Scanning filesystem path {} started", &path);
68+
69+
walk::parallel_search(
70+
&path,
71+
path_metadata,
72+
size_inode_ratio,
73+
args.accurate,
74+
args.one_filesystem,
75+
args.alert_threshold,
76+
args.blacklist_threshold,
77+
);
78+
79+
println!("Scanning filesystem path {} completed", &path);
80+
}
81+
82+
Ok(())
83+
}

0 commit comments

Comments
 (0)