Skip to content
Snippets Groups Projects
Commit 801db124 authored by Joachim Meyer's avatar Joachim Meyer
Browse files

Merge branch 'file_explore' into 'main'

directory info

See merge request !1
parents 7cf6d56c 79f6a816
Branches
No related tags found
1 merge request!1directory info
use `run_tree` to save output of `tree -J` for each file in /home
use `file_count.py` to loop through bash output files and attach count item to them
# Import required modules
import json
import re
import os
# Custom JSON decoder to handle string replacement for escaped characters
class LazyDecoder(json.JSONDecoder):
def decode(self, s, **kwargs):
# Replace patterns in the input string s
regex_replacements = [
(re.compile(r'([^\\])\\([^\\])'), r'\1\\\\\2'),
(re.compile(r',(\s*])'), r'\1'),
]
for regex, replacement in regex_replacements:
s = regex.sub(replacement, s)
# Call the parent class's decode method with the modified string s
return super().decode(s, **kwargs)
counter_dict = {}
# Function to count the number of files in a directory tree
def count_files(node):
if "type" in node and node["type"] == "directory":
# Recursively count the number of files in the directory's children
sum_value = sum(count_files(child) for child in node["contents"])
node["files_count"] = sum_value
count_dir = len(node["contents"])
if count_dir in counter_dict:
counter_dict[count_dir] += 1
else:
counter_dict[count_dir] = 1
return node["files_count"]
return 1
# Get a list of all JSON files in the tree_output directory
user_files = os.listdir('tree_output/')
# Iterate through each JSON file
for user in user_files:
# Load the JSON file using the custom LazyDecoder
data = json.load(open(f'tree_output/{user}'), cls=LazyDecoder)
# Count the number of files in each directory tree in the JSON data
for i in range(len(data)):
count_files(data[i])
# Write the modified JSON data back to the tree_output directory
with open(f'tree_output/{user}', 'w') as f:
json.dump(data, f)
with open(f'folder_count.json', 'w') as f:
json.dump(counter_dict, f)
directory-exploration/folder-folder_count.png

224 KiB

import json
import pandas as pd
from collections import OrderedDict
data = json.load(open('folder_count.json'))
data = {int(k):int(v) for k,v in data.items()}
# data
count_data = pd.DataFrame(list(data.items()), columns = ['folders', 'folder_count'])#orient='index',)
count_data = count_data.sort_values(by='folders')
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
sns.set(rc={'figure.figsize':(11.7,8.27)})
# Plot the bar graph using lmplot
sns.lmplot(x='folders', y='folder_count', data=count_data, height=5, aspect=2, scatter_kws={'color': 'black'})
# Add the title, x-axis label, and y-axis label
plt.title("folder-folder_count")
plt.xlabel("folder")
plt.ylabel("log(ls count)")
plt.yscale('log')
# Zoom In/Out
# plt.ylim(0, 800000)
plt.xlim(0, 4000)
# save
plt.savefig("folder-folder_count.png", dpi=300, bbox_inches='tight')
plt.show()
# creates tree_output directory and saved tree -j output for each directory in home
mkdir tree_output
cd tree_output
for OUTPUT in $(ls /home)
do
sudo tree /home/$OUTPUT/ -J > $OUTPUT.json
done
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment