fix: harden for large files (#643)

This commit is contained in:
Erik Scholz 2025-09-14 06:44:19 +02:00 committed by GitHub
parent c607fc3ed4
commit 9e7befa320
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 4 deletions

View File

@ -107,7 +107,7 @@ const char* unused_tensors[] = {
}; };
bool is_unused_tensor(std::string name) { bool is_unused_tensor(std::string name) {
for (int i = 0; i < sizeof(unused_tensors) / sizeof(const char*); i++) { for (size_t i = 0; i < sizeof(unused_tensors) / sizeof(const char*); i++) {
if (starts_with(name, unused_tensors[i])) { if (starts_with(name, unused_tensors[i])) {
return true; return true;
} }

View File

@ -119,7 +119,7 @@ struct TensorStorage {
size_t file_index = 0; size_t file_index = 0;
int index_in_zip = -1; // >= means stored in a zip file int index_in_zip = -1; // >= means stored in a zip file
size_t offset = 0; // offset in file uint64_t offset = 0; // offset in file
TensorStorage() = default; TensorStorage() = default;
@ -164,10 +164,10 @@ struct TensorStorage {
std::vector<TensorStorage> chunk(size_t n) { std::vector<TensorStorage> chunk(size_t n) {
std::vector<TensorStorage> chunks; std::vector<TensorStorage> chunks;
size_t chunk_size = nbytes_to_read() / n; uint64_t chunk_size = nbytes_to_read() / n;
// printf("%d/%d\n", chunk_size, nbytes_to_read()); // printf("%d/%d\n", chunk_size, nbytes_to_read());
reverse_ne(); reverse_ne();
for (int i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
TensorStorage chunk_i = *this; TensorStorage chunk_i = *this;
chunk_i.ne[0] = ne[0] / n; chunk_i.ne[0] = ne[0] / n;
chunk_i.offset = offset + i * chunk_size; chunk_i.offset = offset + i * chunk_size;