bindexplib.cxx 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
  2. file Copyright.txt or https://cmake.org/licensing for details. */
  3. /*-------------------------------------------------------------------------
  4. Portions of this source have been derived from the 'bindexplib' tool
  5. provided by the CERN ROOT Data Analysis Framework project (root.cern.ch).
  6. Permission has been granted by Pere Mato <[email protected]> to distribute
  7. this derived work under the CMake license.
  8. -------------------------------------------------------------------------*/
  9. /*
  10. *----------------------------------------------------------------------
  11. * Program: dumpexts.exe
  12. * Author: Gordon Chaffee
  13. *
  14. * History: The real functionality of this file was written by
  15. * Matt Pietrek in 1993 in his pedump utility. I've
  16. * modified it to dump the externals in a bunch of object
  17. * files to create a .def file.
  18. *
  19. * Notes: Visual C++ puts an underscore before each exported symbol.
  20. * This file removes them. I don't know if this is a problem
  21. * this other compilers. If _MSC_VER is defined,
  22. * the underscore is removed. If not, it isn't. To get a
  23. * full dump of an object file, use the -f option. This can
  24. * help determine the something that may be different with a
  25. * compiler other than Visual C++.
  26. * ======================================
  27. * Corrections (Axel 2006-04-04):
  28. * Conversion to C++. Mostly.
  29. *
  30. * Extension (Axel 2006-03-15)
  31. * As soon as an object file contains an /EXPORT directive (which
  32. * is generated by the compiler when a symbol is declared as
  33. * __declspec(dllexport) no to-be-exported symbols are printed,
  34. * as the linker will see these directives, and if those directives
  35. * are present we only export selectively (i.e. we trust the
  36. * programmer).
  37. *
  38. * ======================================
  39. * ======================================
  40. * Corrections (Valery Fine 23/02/98):
  41. *
  42. * The "(vector) deleting destructor" MUST not be exported
  43. * To recognize it the following test are introduced:
  44. * "@@UAEPAXI@Z" scalar deleting dtor
  45. * "@@QAEPAXI@Z" vector deleting dtor
  46. * "AEPAXI@Z" vector deleting dtor with thunk adjustor
  47. * ======================================
  48. * Corrections (Valery Fine 12/02/97):
  49. *
  50. * It created a wrong EXPORTS for the global pointers and constants.
  51. * The Section Header has been involved to discover the missing information
  52. * Now the pointers are correctly supplied with "DATA" descriptor
  53. * the constants with no extra descriptor.
  54. *
  55. * Corrections (Valery Fine 16/09/96):
  56. *
  57. * It didn't work for C++ code with global variables and class definitions
  58. * The DumpExternalObject function has been introduced to generate .DEF
  59. *file
  60. *
  61. * Author: Valery Fine 16/09/96 (E-mail: [email protected])
  62. *----------------------------------------------------------------------
  63. */
  64. #include "bindexplib.h"
  65. #include <cstddef> // IWYU pragma: keep
  66. #include <sstream>
  67. #include <vector>
  68. #ifdef _WIN32
  69. # include <windows.h>
  70. # include "cmsys/Encoding.hxx"
  71. #endif
  72. #include "cmsys/FStream.hxx"
  73. #include "cmSystemTools.h"
  74. #ifdef _WIN32
  75. # ifndef IMAGE_FILE_MACHINE_ARM
  76. # define IMAGE_FILE_MACHINE_ARM 0x01c0 // ARM Little-Endian
  77. # endif
  78. # ifndef IMAGE_FILE_MACHINE_THUMB
  79. # define IMAGE_FILE_MACHINE_THUMB 0x01c2 // ARM Thumb/Thumb-2 Little-Endian
  80. # endif
  81. # ifndef IMAGE_FILE_MACHINE_ARMNT
  82. # define IMAGE_FILE_MACHINE_ARMNT 0x01c4 // ARM Thumb-2 Little-Endian
  83. # endif
  84. # ifndef IMAGE_FILE_MACHINE_ARM64
  85. # define IMAGE_FILE_MACHINE_ARM64 0xaa64 // ARM64 Little-Endian
  86. # endif
  87. # ifndef IMAGE_FILE_MACHINE_ARM64EC
  88. # define IMAGE_FILE_MACHINE_ARM64EC 0xa641 // ARM64EC Little-Endian
  89. # endif
  90. typedef struct cmANON_OBJECT_HEADER_BIGOBJ
  91. {
  92. /* same as ANON_OBJECT_HEADER_V2 */
  93. WORD Sig1; // Must be IMAGE_FILE_MACHINE_UNKNOWN
  94. WORD Sig2; // Must be 0xffff
  95. WORD Version; // >= 2 (implies the Flags field is present)
  96. WORD Machine; // Actual machine - IMAGE_FILE_MACHINE_xxx
  97. DWORD TimeDateStamp;
  98. CLSID ClassID; // {D1BAA1C7-BAEE-4ba9-AF20-FAF66AA4DCB8}
  99. DWORD SizeOfData; // Size of data that follows the header
  100. DWORD Flags; // 0x1 -> contains metadata
  101. DWORD MetaDataSize; // Size of CLR metadata
  102. DWORD MetaDataOffset; // Offset of CLR metadata
  103. /* bigobj specifics */
  104. DWORD NumberOfSections; // extended from WORD
  105. DWORD PointerToSymbolTable;
  106. DWORD NumberOfSymbols;
  107. } cmANON_OBJECT_HEADER_BIGOBJ;
  108. typedef struct _cmIMAGE_SYMBOL_EX
  109. {
  110. union
  111. {
  112. BYTE ShortName[8];
  113. struct
  114. {
  115. DWORD Short; // if 0, use LongName
  116. DWORD Long; // offset into string table
  117. } Name;
  118. DWORD LongName[2]; // PBYTE [2]
  119. } N;
  120. DWORD Value;
  121. LONG SectionNumber;
  122. WORD Type;
  123. BYTE StorageClass;
  124. BYTE NumberOfAuxSymbols;
  125. } cmIMAGE_SYMBOL_EX;
  126. typedef cmIMAGE_SYMBOL_EX UNALIGNED* cmPIMAGE_SYMBOL_EX;
  127. enum class Arch
  128. {
  129. Generic,
  130. I386,
  131. ARM64EC,
  132. };
  133. PIMAGE_SECTION_HEADER GetSectionHeaderOffset(
  134. PIMAGE_FILE_HEADER pImageFileHeader)
  135. {
  136. return (PIMAGE_SECTION_HEADER)((DWORD_PTR)pImageFileHeader +
  137. IMAGE_SIZEOF_FILE_HEADER +
  138. pImageFileHeader->SizeOfOptionalHeader);
  139. }
  140. PIMAGE_SECTION_HEADER GetSectionHeaderOffset(
  141. cmANON_OBJECT_HEADER_BIGOBJ* pImageFileHeader)
  142. {
  143. return (PIMAGE_SECTION_HEADER)((DWORD_PTR)pImageFileHeader +
  144. sizeof(cmANON_OBJECT_HEADER_BIGOBJ));
  145. }
  146. /*
  147. + * Utility func, strstr with size
  148. + */
  149. const char* StrNStr(const char* start, const char* find, size_t& size)
  150. {
  151. size_t len;
  152. const char* hint;
  153. if (!start || !find || !size) {
  154. size = 0;
  155. return 0;
  156. }
  157. len = strlen(find);
  158. while ((hint = (const char*)memchr(start, find[0], size - len + 1))) {
  159. size -= (hint - start);
  160. if (!strncmp(hint, find, len))
  161. return hint;
  162. start = hint + 1;
  163. }
  164. size = 0;
  165. return 0;
  166. }
  167. template <
  168. // cmANON_OBJECT_HEADER_BIGOBJ or IMAGE_FILE_HEADER
  169. class ObjectHeaderType,
  170. // cmPIMAGE_SYMBOL_EX or PIMAGE_SYMBOL
  171. class SymbolTableType>
  172. class DumpSymbols
  173. {
  174. public:
  175. /*
  176. *----------------------------------------------------------------------
  177. * Constructor --
  178. *
  179. * Initialize variables from pointer to object header.
  180. *
  181. *----------------------------------------------------------------------
  182. */
  183. DumpSymbols(ObjectHeaderType* ih, std::set<std::string>& symbols,
  184. std::set<std::string>& dataSymbols,
  185. Arch symbolArch = Arch::Generic)
  186. : Symbols(symbols)
  187. , DataSymbols(dataSymbols)
  188. {
  189. this->ObjectImageHeader = ih;
  190. this->SymbolTable =
  191. (SymbolTableType*)((DWORD_PTR)this->ObjectImageHeader +
  192. this->ObjectImageHeader->PointerToSymbolTable);
  193. this->SectionHeaders = GetSectionHeaderOffset(this->ObjectImageHeader);
  194. this->SymbolCount = this->ObjectImageHeader->NumberOfSymbols;
  195. this->SymbolArch = symbolArch;
  196. }
  197. /*
  198. *----------------------------------------------------------------------
  199. * DumpObjFile --
  200. *
  201. * Dump an object file's exported symbols.
  202. *----------------------------------------------------------------------
  203. */
  204. void DumpObjFile() { this->DumpExternalsObjects(); }
  205. /*
  206. *----------------------------------------------------------------------
  207. * DumpExternalsObjects --
  208. *
  209. * Dumps a COFF symbol table from an OBJ.
  210. *----------------------------------------------------------------------
  211. */
  212. void DumpExternalsObjects()
  213. {
  214. unsigned i;
  215. PSTR stringTable;
  216. std::string symbol;
  217. DWORD SectChar;
  218. /*
  219. * The string table apparently starts right after the symbol table
  220. */
  221. stringTable = (PSTR) & this->SymbolTable[this->SymbolCount];
  222. SymbolTableType* pSymbolTable = this->SymbolTable;
  223. for (i = 0; i < this->SymbolCount; i++) {
  224. if (pSymbolTable->SectionNumber > 0 &&
  225. (pSymbolTable->Type == 0x20 || pSymbolTable->Type == 0x0)) {
  226. if (pSymbolTable->StorageClass == IMAGE_SYM_CLASS_EXTERNAL) {
  227. /*
  228. * The name of the Function entry points
  229. */
  230. if (pSymbolTable->N.Name.Short != 0) {
  231. symbol.clear();
  232. symbol.insert(0, (const char*)pSymbolTable->N.ShortName, 8);
  233. } else {
  234. symbol = stringTable + pSymbolTable->N.Name.Long;
  235. }
  236. // clear out any leading spaces
  237. while (isspace(symbol[0]))
  238. symbol.erase(0, 1);
  239. // if it starts with _ and has an @ then it is a __cdecl
  240. // so remove the @ stuff for the export
  241. if (symbol[0] == '_') {
  242. std::string::size_type posAt = symbol.find('@');
  243. if (posAt != std::string::npos) {
  244. symbol.erase(posAt);
  245. }
  246. }
  247. // For i386 builds we need to remove _
  248. if (this->SymbolArch == Arch::I386 && symbol[0] == '_') {
  249. symbol.erase(0, 1);
  250. }
  251. // Check whether it is "Scalar deleting destructor" and "Vector
  252. // deleting destructor"
  253. // if scalarPrefix and vectorPrefix are not found then print
  254. // the symbol
  255. const char* scalarPrefix = "??_G";
  256. const char* vectorPrefix = "??_E";
  257. const char* vftablePrefix = "??_7";
  258. // The original code had a check for
  259. // symbol.find("real@") == std::string::npos)
  260. // but this disallows member functions with the name "real".
  261. if (symbol.compare(0, 4, scalarPrefix) &&
  262. symbol.compare(0, 4, vectorPrefix)) {
  263. SectChar = this->SectionHeaders[pSymbolTable->SectionNumber - 1]
  264. .Characteristics;
  265. // skip symbols containing a dot or are from managed code
  266. if (symbol.find('.') == std::string::npos &&
  267. !SymbolIsFromManagedCode(symbol)) {
  268. // skip arm64ec thunk symbols
  269. if (this->SymbolArch != Arch::ARM64EC ||
  270. (symbol.find("$ientry_thunk") == std::string::npos &&
  271. symbol.find("$entry_thunk") == std::string::npos &&
  272. symbol.find("$iexit_thunk") == std::string::npos &&
  273. symbol.find("$exit_thunk") == std::string::npos)) {
  274. if ((!pSymbolTable->Type &&
  275. // Read only (i.e. constants) must be excluded
  276. (SectChar & IMAGE_SCN_MEM_WRITE)) ||
  277. (this->SymbolArch == Arch::ARM64EC &&
  278. // vftable symbols are DATA on ARM64EC
  279. symbol.compare(0, 4, vftablePrefix) == 0)) {
  280. this->DataSymbols.insert(symbol);
  281. } else if (pSymbolTable->Type ||
  282. !(SectChar & IMAGE_SCN_MEM_READ) ||
  283. (SectChar & IMAGE_SCN_MEM_EXECUTE) ||
  284. (this->SymbolArch != Arch::ARM64EC &&
  285. // vftable symbols fail if marked as DATA
  286. symbol.compare(0, 4, vftablePrefix) == 0)) {
  287. this->Symbols.insert(symbol);
  288. }
  289. }
  290. }
  291. }
  292. }
  293. }
  294. /*
  295. * Take into account any aux symbols
  296. */
  297. i += pSymbolTable->NumberOfAuxSymbols;
  298. pSymbolTable += pSymbolTable->NumberOfAuxSymbols;
  299. pSymbolTable++;
  300. }
  301. }
  302. private:
  303. bool SymbolIsFromManagedCode(std::string const& symbol)
  304. {
  305. return symbol == "__t2m" || symbol == "__m2mep" || symbol == "__mep" ||
  306. symbol.find("$$F") != std::string::npos ||
  307. symbol.find("$$J") != std::string::npos;
  308. }
  309. std::set<std::string>& Symbols;
  310. std::set<std::string>& DataSymbols;
  311. DWORD_PTR SymbolCount;
  312. PIMAGE_SECTION_HEADER SectionHeaders;
  313. ObjectHeaderType* ObjectImageHeader;
  314. SymbolTableType* SymbolTable;
  315. Arch SymbolArch;
  316. };
  317. #endif
  318. static bool DumpFileWithLlvmNm(std::string const& nmPath, const char* filename,
  319. std::set<std::string>& symbols,
  320. std::set<std::string>& dataSymbols)
  321. {
  322. std::string output;
  323. // break up command line into a vector
  324. std::vector<std::string> command;
  325. command.push_back(nmPath);
  326. command.emplace_back("--no-weak");
  327. command.emplace_back("--defined-only");
  328. command.emplace_back("--format=posix");
  329. command.emplace_back(filename);
  330. // run the command
  331. int exit_code = 0;
  332. cmSystemTools::RunSingleCommand(command, &output, &output, &exit_code,
  333. nullptr, cmSystemTools::OUTPUT_NONE);
  334. if (exit_code != 0) {
  335. fprintf(stderr, "llvm-nm returned an error: %s\n", output.c_str());
  336. return false;
  337. }
  338. std::istringstream ss(output);
  339. std::string line;
  340. while (std::getline(ss, line)) {
  341. if (line.empty()) { // last line
  342. continue;
  343. }
  344. size_t sym_end = line.find(' ');
  345. if (sym_end == std::string::npos) {
  346. fprintf(stderr, "Couldn't parse llvm-nm output line: %s\n",
  347. line.c_str());
  348. return false;
  349. }
  350. if (line.size() < sym_end + 1) {
  351. fprintf(stderr, "Couldn't parse llvm-nm output line: %s\n",
  352. line.c_str());
  353. return false;
  354. }
  355. const char sym_type = line[sym_end + 1];
  356. line.resize(sym_end);
  357. switch (sym_type) {
  358. case 'D':
  359. dataSymbols.insert(line);
  360. break;
  361. case 'T':
  362. symbols.insert(line);
  363. break;
  364. }
  365. }
  366. return true;
  367. }
  368. static bool DumpFile(std::string const& nmPath, const char* filename,
  369. std::set<std::string>& symbols,
  370. std::set<std::string>& dataSymbols)
  371. {
  372. #ifndef _WIN32
  373. return DumpFileWithLlvmNm(nmPath, filename, symbols, dataSymbols);
  374. #else
  375. HANDLE hFile;
  376. HANDLE hFileMapping;
  377. LPVOID lpFileBase;
  378. hFile = CreateFileW(cmsys::Encoding::ToWide(filename).c_str(), GENERIC_READ,
  379. FILE_SHARE_READ, nullptr, OPEN_EXISTING,
  380. FILE_ATTRIBUTE_NORMAL, 0);
  381. if (hFile == INVALID_HANDLE_VALUE) {
  382. fprintf(stderr, "Couldn't open file '%s' with CreateFile()\n", filename);
  383. return false;
  384. }
  385. hFileMapping =
  386. CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, 0, nullptr);
  387. if (hFileMapping == 0) {
  388. CloseHandle(hFile);
  389. fprintf(stderr, "Couldn't open file mapping with CreateFileMapping()\n");
  390. return false;
  391. }
  392. lpFileBase = MapViewOfFile(hFileMapping, FILE_MAP_READ, 0, 0, 0);
  393. if (lpFileBase == 0) {
  394. CloseHandle(hFileMapping);
  395. CloseHandle(hFile);
  396. fprintf(stderr, "Couldn't map view of file with MapViewOfFile()\n");
  397. return false;
  398. }
  399. const PIMAGE_DOS_HEADER dosHeader = (PIMAGE_DOS_HEADER)lpFileBase;
  400. if (dosHeader->e_magic == IMAGE_DOS_SIGNATURE) {
  401. fprintf(stderr, "File is an executable. I don't dump those.\n");
  402. return false;
  403. } else {
  404. const PIMAGE_FILE_HEADER imageHeader = (PIMAGE_FILE_HEADER)lpFileBase;
  405. /* Does it look like a COFF OBJ file??? */
  406. if (((imageHeader->Machine == IMAGE_FILE_MACHINE_I386) ||
  407. (imageHeader->Machine == IMAGE_FILE_MACHINE_AMD64) ||
  408. (imageHeader->Machine == IMAGE_FILE_MACHINE_ARM) ||
  409. (imageHeader->Machine == IMAGE_FILE_MACHINE_ARMNT) ||
  410. (imageHeader->Machine == IMAGE_FILE_MACHINE_ARM64) ||
  411. (imageHeader->Machine == IMAGE_FILE_MACHINE_ARM64EC)) &&
  412. (imageHeader->Characteristics == 0)) {
  413. /*
  414. * The tests above are checking for IMAGE_FILE_HEADER.Machine
  415. * if it contains supported machine formats (currently ARM and x86)
  416. * and IMAGE_FILE_HEADER.Characteristics == 0 indicating that
  417. * this is not linked COFF OBJ file;
  418. */
  419. DumpSymbols<IMAGE_FILE_HEADER, IMAGE_SYMBOL> symbolDumper(
  420. (PIMAGE_FILE_HEADER)lpFileBase, symbols, dataSymbols,
  421. (imageHeader->Machine == IMAGE_FILE_MACHINE_I386
  422. ? Arch::I386
  423. : (imageHeader->Machine == IMAGE_FILE_MACHINE_ARM64EC
  424. ? Arch::ARM64EC
  425. : Arch::Generic)));
  426. symbolDumper.DumpObjFile();
  427. } else {
  428. // check for /bigobj and llvm LTO format
  429. cmANON_OBJECT_HEADER_BIGOBJ* h =
  430. (cmANON_OBJECT_HEADER_BIGOBJ*)lpFileBase;
  431. if (h->Sig1 == 0x0 && h->Sig2 == 0xffff) {
  432. // bigobj
  433. DumpSymbols<cmANON_OBJECT_HEADER_BIGOBJ, cmIMAGE_SYMBOL_EX>
  434. symbolDumper(
  435. (cmANON_OBJECT_HEADER_BIGOBJ*)lpFileBase, symbols, dataSymbols,
  436. (h->Machine == IMAGE_FILE_MACHINE_I386
  437. ? Arch::I386
  438. : (h->Machine == IMAGE_FILE_MACHINE_ARM64EC ? Arch::ARM64EC
  439. : Arch::Generic)));
  440. symbolDumper.DumpObjFile();
  441. } else if (
  442. // BCexCODE - llvm bitcode
  443. (h->Sig1 == 0x4342 && h->Sig2 == 0xDEC0) ||
  444. // 0x0B17C0DE - llvm bitcode BC wrapper
  445. (h->Sig1 == 0x0B17 && h->Sig2 == 0xC0DE)) {
  446. return DumpFileWithLlvmNm(nmPath, filename, symbols, dataSymbols);
  447. } else {
  448. printf("unrecognized file format in '%s, %u'\n", filename,
  449. imageHeader->Machine);
  450. return false;
  451. }
  452. }
  453. }
  454. UnmapViewOfFile(lpFileBase);
  455. CloseHandle(hFileMapping);
  456. CloseHandle(hFile);
  457. return true;
  458. #endif
  459. }
  460. bool bindexplib::AddObjectFile(const char* filename)
  461. {
  462. return DumpFile(this->NmPath, filename, this->Symbols, this->DataSymbols);
  463. }
  464. bool bindexplib::AddDefinitionFile(const char* filename)
  465. {
  466. cmsys::ifstream infile(filename);
  467. if (!infile) {
  468. fprintf(stderr, "Couldn't open definition file '%s'\n", filename);
  469. return false;
  470. }
  471. std::string str;
  472. while (std::getline(infile, str)) {
  473. // skip the LIBRARY and EXPORTS lines (if any)
  474. if ((str.compare(0, 7, "LIBRARY") == 0) ||
  475. (str.compare(0, 7, "EXPORTS") == 0)) {
  476. continue;
  477. }
  478. // remove leading tabs & spaces
  479. str.erase(0, str.find_first_not_of(" \t"));
  480. std::size_t found = str.find(" \t DATA");
  481. if (found != std::string::npos) {
  482. str.erase(found, std::string::npos);
  483. this->DataSymbols.insert(str);
  484. } else {
  485. this->Symbols.insert(str);
  486. }
  487. }
  488. infile.close();
  489. return true;
  490. }
  491. void bindexplib::WriteFile(FILE* file)
  492. {
  493. fprintf(file, "EXPORTS \n");
  494. for (std::string const& ds : this->DataSymbols) {
  495. fprintf(file, "\t%s \t DATA\n", ds.c_str());
  496. }
  497. for (std::string const& s : this->Symbols) {
  498. fprintf(file, "\t%s\n", s.c_str());
  499. }
  500. }
  501. void bindexplib::SetNmPath(std::string const& nm)
  502. {
  503. this->NmPath = nm;
  504. }