#define WIN32_LEAN_AND_MEAN #define _WIN32_WINNT 0x0600 #include #include #include #include typedef std::vector FileSizeVec; SRWLOCK g_srwLock; DWORD g_outstandingRequests = 0; ULONGLONG g_largestFileSize = 0; std::wstring g_largestFile; struct ThreadParams { std::wstring dir; FileSizeVec& fileSizes; ThreadParams(const std::wstring& dir, FileSizeVec& fileSizes) : dir(dir), fileSizes(fileSizes) {} }; DWORD WINAPI UserCallback(PVOID pParams); void IterateDir(const std::wstring& root, FileSizeVec& fileSizes) { FileSizeVec localSizeVec; std::wstring fileSpec = root + L'*'; WIN32_FIND_DATA wfd = {0}; HANDLE hDirEntry = FindFirstFile(fileSpec.c_str(), &wfd); ULONGLONG biggestFileSizeSoFar = 0; std::wstring biggestFileName; if(hDirEntry == INVALID_HANDLE_VALUE) { wprintf(L"Failed to start iterating %s because of error %lu\n", fileSpec.c_str(), GetLastError()); return; } do { if(wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { if(wfd.cFileName[0] == L'.' && (wfd.cFileName[1] == 0 || (wfd.cFileName[1] == L'.' && wfd.cFileName[2] == 0))) { continue; } std::wstring nextRoot = root + wfd.cFileName; nextRoot += L'\\'; ThreadParams* pTp = new(std::nothrow) ThreadParams(nextRoot, fileSizes); if(pTp) { _InterlockedIncrement((volatile LONG*)&g_outstandingRequests); QueueUserWorkItem(&UserCallback, pTp, WT_EXECUTEDEFAULT); } else { wprintf(L"Failed to allocate memory for threadparams for root %s\n", nextRoot.c_str()); } } else { ULONGLONG fileSize = wfd.nFileSizeHigh; fileSize <<= 32; fileSize |= wfd.nFileSizeLow; localSizeVec.push_back(fileSize); if(fileSize == 0) { wprintf(L"Zero byte size file %s%s\n", root.c_str(), wfd.cFileName); } if(biggestFileSizeSoFar < fileSize) { biggestFileSizeSoFar = fileSize; biggestFileName = root + wfd.cFileName; } } } while(FindNextFile(hDirEntry, &wfd)); DWORD err = GetLastError(); FindClose(hDirEntry); bool ret = (err == ERROR_NO_MORE_FILES); if(!ret) { wprintf(L"Failed to enumerate all files with pattern %s because of error %lu\n", fileSpec.c_str(), err); } AcquireSRWLockExclusive(&g_srwLock); if(g_largestFileSize < biggestFileSizeSoFar) { g_largestFileSize = biggestFileSizeSoFar; g_largestFile = biggestFileName; } fileSizes.insert(fileSizes.end(), localSizeVec.begin(), localSizeVec.end()); ReleaseSRWLockExclusive(&g_srwLock); } DWORD WINAPI UserCallback(PVOID pParams) { ThreadParams* pTp = static_cast(pParams); IterateDir(pTp->dir, pTp->fileSizes); _InterlockedDecrement((volatile LONG*)&g_outstandingRequests); delete pTp; return 0; } #include #include int __cdecl wmain(int argc, wchar_t** argv) { if(argc < 2) { puts("Usage: FileSizeStats "); return 0; } InitializeSRWLock(&g_srwLock); FileSizeVec fileSizes; fileSizes.reserve(500000); std::wstring rootDir = argv[1]; rootDir += L'\\'; IterateDir(rootDir, fileSizes); while(_InterlockedCompareExchange((volatile long*)&g_outstandingRequests, 0, 0) != 0) { Sleep(1000); } size_t numFiles = fileSizes.size(); // 0 < 25K // 1 25K < 50K // 1 50K < 100K // 2 100K < 400K // 3 400K < 800K // 4 800K < 2MB // 5 2MB < 5MB // 6 5MB < 10MB // 7 10MB < 50MB // 8 50MB < 100MB // 9 100MB+ #define KB(x) ((x) * 1024) #define MB(x) ((x) * 1024 * 1024) ULONG buckets[11] = {0}; for(FileSizeVec::const_iterator iter = fileSizes.begin(), end = fileSizes.end(); iter != end; ++iter ) { const ULONGLONG size = *iter; if(size <= KB(25)) { ++buckets[0]; } else if(size < KB(50)) { ++buckets[1]; } else if(size < KB(100)) { ++buckets[2]; } else if(size < KB(400)) { ++buckets[3]; } else if(size < KB(800)) { ++buckets[4]; } else if(size < MB(2)) { ++buckets[5]; } else if(size < MB(5)) { ++buckets[6]; } else if(size < MB(10)) { ++buckets[7]; } else if(size < MB(50)) { ++buckets[8]; } else if(size < MB(100)) { ++buckets[9]; } else { ++buckets[10]; } } for(int i = 0; i < ARRAYSIZE(buckets); ++i) { printf("Bucket[%i] = %i (%.2f%%)\n", i, buckets[i], (float(buckets[i]) / numFiles) * 100); } ULONGLONG totalSize = std::accumulate(fileSizes.begin(), fileSizes.end(), 0ULL); WCHAR totalBuf[25] = {0}, averageBuf[25] = {0}, largestFileBuf[25] = {0}; StrFormatByteSizeW((LONGLONG)totalSize, totalBuf, ARRAYSIZE(totalBuf)); ULONGLONG average = totalSize / numFiles; StrFormatByteSizeW((LONGLONG)average, averageBuf, ARRAYSIZE(averageBuf)); StrFormatByteSizeW((LONGLONG)g_largestFileSize, largestFileBuf, ARRAYSIZE(largestFileBuf)); fileSizes.erase(std::remove(fileSizes.begin(), fileSizes.end(), 0), fileSizes.end()); size_t numFilesNonZero = fileSizes.size(); ULONGLONG averageNonZeroSize = totalSize / numFilesNonZero; WCHAR averageNonZeroBuf[25] = {0}; StrFormatByteSizeW((LONGLONG)averageNonZeroSize, averageNonZeroBuf, ARRAYSIZE(averageNonZeroBuf)); wprintf( L"Total size = %s (%I64u) in %Iu files\nAverage File Size = %s (%I64u)\n" L"Largest File = %s (%s - %I64u)\n" L"Total non-zero file = %Iu\nAverage Non-Zero File Size = %s (%I64u)\n", totalBuf, totalSize, numFiles, averageBuf, average, g_largestFile.c_str(), largestFileBuf, g_largestFileSize, numFilesNonZero, averageNonZeroBuf, averageNonZeroSize ); return 0; }