Stroika Library 3.0d20
 
Loading...
Searching...
No Matches
Archive/Zip/Reader.cpp
1/*
2 * Copyright(c) Sophist Solutions, Inc. 1990-2025. All rights reserved
3 */
4#include "Stroika/Foundation/StroikaPreComp.h"
5
6#if qStroika_HasComponent_zlib
7#include <zlib.h>
8#endif
9
11#include "Stroika/Foundation/DataExchange/Archive/Zip/Private_minizip_.h"
13#include "Stroika/Foundation/Streams/MemoryStream.h"
14
15#include "Reader.h"
16
17using namespace Stroika::Foundation;
21using namespace Stroika::Foundation::DataExchange::Archive;
22using namespace Stroika::Foundation::Execution;
23using namespace Stroika::Foundation::Streams;
24
25using std::byte;
26
27#if qStroika_HasComponent_zlib
28using namespace Stroika::Foundation::DataExchange::Archive::Zip::PrivateMinizip_;
29
30namespace {
31 void ThrowIfMinizipErr_ (int err, const String& doing)
32 {
33 if (err != UNZ_OK) [[unlikely]] {
34 Throw (RuntimeErrorException{Format ("error {} with zipfile in {}"_f, err, doing)});
35 }
36 }
37}
38
39namespace {
40 struct MyZipLibInStream_ final : zlib_filefunc64_def {
41 InputStream::Ptr<byte> fInStream_;
42#if qStroika_Foundation_Debug_AssertionsChecked
43 bool fOpened_{false};
44#endif
45 MyZipLibInStream_ (const InputStream::Ptr<byte>& in)
46 : fInStream_{in}
47 {
48 this->zopen64_file = [] (voidpf opaqueStream, const void* /*filename*/, int /*mode*/) -> voidpf {
49 MyZipLibInStream_* myThis = reinterpret_cast<MyZipLibInStream_*> (opaqueStream);
50#if qStroika_Foundation_Debug_AssertionsChecked
51 Assert (not myThis->fOpened_);
52 myThis->fOpened_ = true;
53#endif
54 return myThis;
55 };
56 this->zread_file = [] (voidpf opaqueStream, [[maybe_unused]] voidpf stream, void* buf, uLong size) -> uLong {
57 Require (opaqueStream == stream); // our use is one stream per zlib_filefunc64_def object
58 MyZipLibInStream_* myThis = reinterpret_cast<MyZipLibInStream_*> (opaqueStream);
59#if qStroika_Foundation_Debug_AssertionsChecked
60 Assert (myThis->fOpened_);
61#endif
62 size_t sz = myThis->fInStream_.ReadBlocking (span{reinterpret_cast<byte*> (buf), size}).size ();
63 Assert (sz <= size);
64 return static_cast<uLong> (sz);
65 };
66 this->zwrite_file = [] (voidpf /*opaque*/, voidpf /*stream*/, const void* /*buf*/, uLong /*size*/) -> uLong {
67 RequireNotReached (); // read only zip
68 return static_cast<uLong> (UNZ_PARAMERROR);
69 };
70 this->ztell64_file = [] (voidpf opaqueStream, [[maybe_unused]] voidpf stream) -> ZPOS64_T {
71 Require (opaqueStream == stream); // our use is one stream per zlib_filefunc64_def object
72 MyZipLibInStream_* myThis = reinterpret_cast<MyZipLibInStream_*> (opaqueStream);
73#if qStroika_Foundation_Debug_AssertionsChecked
74 Assert (myThis->fOpened_);
75#endif
76 return myThis->fInStream_.GetOffset ();
77 };
78 this->zseek64_file = [] (voidpf opaqueStream, [[maybe_unused]] voidpf stream, ZPOS64_T offset, int origin) -> long {
79 Require (opaqueStream == stream); // our use is one stream per zlib_filefunc64_def object
80 MyZipLibInStream_* myThis = reinterpret_cast<MyZipLibInStream_*> (opaqueStream);
81#if qStroika_Foundation_Debug_AssertionsChecked
82 Assert (myThis->fOpened_);
83#endif
84 switch (origin) {
85 case ZLIB_FILEFUNC_SEEK_SET:
86 myThis->fInStream_.Seek (offset);
87 break;
88 case ZLIB_FILEFUNC_SEEK_CUR:
89 myThis->fInStream_.Seek (eFromCurrent, offset);
90 break;
91 case ZLIB_FILEFUNC_SEEK_END:
92 myThis->fInStream_.Seek (eFromEnd, offset);
93 break;
94 default:
96 return UNZ_PARAMERROR;
97 }
98 return UNZ_OK;
99 };
100 this->zclose_file = [] ([[maybe_unused]] voidpf opaqueStream, [[maybe_unused]] voidpf stream) -> int {
101#if qStroika_Foundation_Debug_AssertionsChecked
102 Require (opaqueStream == stream); // our use is one stream per zlib_filefunc64_def object
103 MyZipLibInStream_* myThis = reinterpret_cast<MyZipLibInStream_*> (opaqueStream);
104 Assert (myThis->fOpened_);
105 myThis->fOpened_ = false;
106#endif
107 return UNZ_OK;
108 };
109 this->zerror_file = [] (voidpf opaqueStream, [[maybe_unused]] voidpf stream) -> int {
110 Require (opaqueStream == stream); // our use is one stream per zlib_filefunc64_def object
111 [[maybe_unused]] MyZipLibInStream_* myThis = reinterpret_cast<MyZipLibInStream_*> (opaqueStream);
112#if qStroika_Foundation_Debug_AssertionsChecked
113 Assert (myThis->fOpened_);
114#endif
115 return UNZ_OK; // @todo - see what this means?
116 };
117 this->opaque = this;
118 }
119 ~MyZipLibInStream_ ()
120 {
121#if qStroika_Foundation_Debug_AssertionsChecked
122 Assert (not fOpened_);
123#endif
124 }
125 };
126}
127
128namespace {
129
130 struct Rep_ final : public Reader::IRep {
131 MyZipLibInStream_ fInSeekStream_;
132 unzFile fZipFile_;
133
134 Rep_ (const InputStream::Ptr<byte>& in)
135 : fInSeekStream_{in}
136 , fZipFile_{unzOpen2_64 ("", &fInSeekStream_)}
137 {
138 if (fZipFile_ == nullptr) [[unlikely]] {
139 static const RuntimeErrorException kException_{"failed to open zipfile"sv};
140 Throw (kException_);
141 }
142 }
143 ~Rep_ ()
144 {
145 AssertNotNull (fZipFile_);
146 unzClose (fZipFile_);
147 }
148 virtual Set<String> GetContainedFiles () const override
149 {
150 Set<String> result;
151 unz_global_info64 gi;
152 ThrowIfMinizipErr_ (unzGetGlobalInfo64 (fZipFile_, &gi), "unzGetGlobalInfo64"sv);
153 for (size_t i = 0; i < gi.number_entry; i++) {
154 char filename_inzip[10 * 1024];
155 unz_file_info64 file_info;
156 //uLong ratio = 0;
157 //const char* string_method;
158 //char charCrypt = ' ';
159 ThrowIfMinizipErr_ (::unzGetCurrentFileInfo64 (fZipFile_, &file_info, filename_inzip, sizeof (filename_inzip), NULL, 0, NULL, 0),
160 "unzGetGlobalInfo64"sv);
161 if ((i + 1) < gi.number_entry) {
162 ThrowIfMinizipErr_ (::unzGoToNextFile_ (fZipFile_), "unzGoToNextFile_"sv);
163 }
164 //tmphack
165 if (filename_inzip[::strlen (filename_inzip) - 1] == '/') {
166 continue; // only list files - not directories for now
167 }
168 // From https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT, must check bit 11 to see if really unicode, but not sure what code page if not unicode so just assume unicode!
169 result.Add (String::FromUTF8 (filename_inzip));
170 }
171#if 0
172 // Keep temporarily, because we will want a traversal variant that captures this extra info
173 uLong i;
174 unz_global_info64 gi;
175 int err;
176
177 err = unzGetGlobalInfo64(uf, &gi);
178 if (err != UNZ_OK)
179 printf("error %d with zipfile in unzGetGlobalInfo \n", err);
180 printf(" Length Method Size Ratio Date Time CRC-32 Name\n");
181 printf(" ------ ------ ---- ----- ---- ---- ------ ----\n");
182 for (i = 0; i < gi.number_entry; ++i) {
183 char filename_inzip[256];
184 unz_file_info64 file_info;
185 uLong ratio = 0;
186 const char* string_method;
187 char charCrypt = ' ';
188 err = unzGetCurrentFileInfo64(uf, &file_info, filename_inzip, sizeof(filename_inzip), NULL, 0, NULL, 0);
189 if (err != UNZ_OK) {
190 printf("error %d with zipfile in unzGetCurrentFileInfo\n", err);
191 break;
192 }
193 if (file_info.uncompressed_size > 0)
194 ratio = (uLong)((file_info.compressed_size * 100) / file_info.uncompressed_size);
195
196 /* display a '*' if the file is crypted */
197 if ((file_info.flag & 1) != 0)
198 charCrypt = '*';
199
200 if (file_info.compression_method == 0)
201 string_method = "Stored";
202 else if (file_info.compression_method == Z_DEFLATED) {
203 uInt iLevel = (uInt)((file_info.flag & 0x6) / 2);
204 if (iLevel == 0)
205 string_method = "Defl:N";
206 else if (iLevel == 1)
207 string_method = "Defl:X";
208 else if ((iLevel == 2) or (iLevel == 3))
209 string_method = "Defl:F"; /* 2:fast , 3 : extra fast*/
210 }
211 else if (file_info.compression_method == Z_BZIP2ED) {
212 string_method = "BZip2 ";
213 }
214 else
215 string_method = "Unkn. ";
216
217 Display64BitsSize(file_info.uncompressed_size, 7);
218 printf(" %6s%c", string_method, charCrypt);
219 Display64BitsSize(file_info.compressed_size, 7);
220 printf(" %3lu%% %2.2lu-%2.2lu-%2.2lu %2.2lu:%2.2lu %8.8lx %s\n",
221 ratio,
222 (uLong)file_info.tmu_date.tm_mon + 1,
223 (uLong)file_info.tmu_date.tm_mday,
224 (uLong)file_info.tmu_date.tm_year % 100,
225 (uLong)file_info.tmu_date.tm_hour, (uLong)file_info.tmu_date.tm_min,
226 (uLong)file_info.crc, filename_inzip);
227 if ((i + 1) < gi.number_entry) {
228 err = unzGoToNextFile_ (uf);
229 if (err != UNZ_OK) {
230 printf("error %d with zipfile in unzGoToNextFile\n", err);
231 break;
232 }
233 }
234 }
235
236 for (unsigned int i = 0; i < fDB_.NumFiles; i++) {
237 if (not SzArEx_IsDir (&fDB_, i)) {
238 size_t nameLen = ::SzArEx_GetFileNameUtf16 (&fDB_, i, nullptr);
239 if (nameLen < 1) {
240 break;
241 }
242 Memory::StackBuffer<char16_t> fileName {Memory::eUninitiialized, nameLen};
243 size_t z = ::SzArEx_GetFileNameUtf16 (&fDB_, i, reinterpret_cast<UInt16*> (&fileName[0]));
244 result.Add (String{&fileName[0]});
245 }
246 }
247#endif
248 return result;
249 }
250 virtual Memory::BLOB GetData (const String& fileName) const override
251 {
252 // See comments in GetContainedFiles about filename character encoding
253 if (unzLocateFile_ (fZipFile_, fileName.AsUTF8<string> ().c_str (), 1) != UNZ_OK) [[unlikely]] {
254 Throw (RuntimeErrorException{Format ("File '{}' not found"_f, fileName)});
255 }
256 const char* password = nullptr;
257 int err = unzOpenCurrentFilePassword (fZipFile_, password);
258 [[maybe_unused]] auto&& cleanup = Finally ([this] () noexcept { unzCloseCurrentFile_ (fZipFile_); });
259 MemoryStream::Ptr<byte> tmpBuf = MemoryStream::New<byte> ();
260 do {
261 byte buf[10 * 1024];
262 err = unzReadCurrentFile_ (fZipFile_, buf, static_cast<unsigned int> (Memory::NEltsOf (buf)));
263 if (err < 0) [[unlikely]] {
264 Throw (RuntimeErrorException{Format (L"File '{}' error {} extracting"_f, fileName, err)});
265 }
266 else if (err > 0) {
267 Assert (static_cast<size_t> (err) <= Memory::NEltsOf (buf));
268 tmpBuf.Write (span{buf, static_cast<size_t> (err)});
269 }
270 } while (err > 0);
271 return tmpBuf.As<Memory::BLOB> ();
272 }
273 };
274}
275
276/*
277 ********************************************************************************
278 ***************** DataExchange::Archive::Zip::Reader::New **********************
279 ********************************************************************************
280 */
281Archive::Reader::Ptr Archive::Zip::Reader::New (const InputStream::Ptr<byte>& readFrom)
282{
283 return Archive::Reader::Ptr{make_shared<Rep_> (readFrom)};
284}
285#endif
#define AssertNotNull(p)
Definition Assertions.h:333
#define RequireNotReached()
Definition Assertions.h:385
#define AssertNotReached()
Definition Assertions.h:355
String is like std::u32string, except it is much easier to use, often much more space efficient,...
Definition String.h:201
Set<T> is a container of T, where once an item is added, additionally adds () do nothing.
nonvirtual void Add(ArgByValueType< value_type > item)
Definition Set.inl:138
Logically halfway between std::array and std::vector; Smart 'direct memory array' - which when needed...
InputStream<>::Ptr is Smart pointer (with abstract Rep) class defining the interface to reading from ...
void Throw(T &&e2Throw)
identical to builtin C++ 'throw' except that it does helpful, type dependent DbgTrace() messages firs...
Definition Throw.inl:43
auto Finally(FUNCTION &&f) -> Private_::FinallySentry< FUNCTION >
Definition Finally.inl:31