//===- SymbolTable.cpp ----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "SymbolTable.h" #include "Config.h" #include "Driver.h" #include "LTO.h" #include "PDB.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Timer.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; namespace lld { namespace coff { static Timer ltoTimer("LTO", Timer::root()); SymbolTable *symtab; void SymbolTable::addFile(InputFile *file) { log("Reading " + toString(file)); file->parse(); MachineTypes mt = file->getMachineType(); if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) { config->machine = mt; } else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) { error(toString(file) + ": machine type " + machineToStr(mt) + " conflicts with " + machineToStr(config->machine)); return; } if (auto *f = dyn_cast(file)) { ObjFile::instances.push_back(f); } else if (auto *f = dyn_cast(file)) { BitcodeFile::instances.push_back(f); } else if (auto *f = dyn_cast(file)) { ImportFile::instances.push_back(f); } driver->parseDirectives(file); } static void errorOrWarn(const Twine &s) { if (config->forceUnresolved) warn(s); else error(s); } // Returns the symbol in SC whose value is <= Addr that is closest to Addr. // This is generally the global variable or function whose definition contains // Addr. static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) { DefinedRegular *candidate = nullptr; for (Symbol *s : sc->file->getSymbols()) { auto *d = dyn_cast_or_null(s); if (!d || d->getChunk() != sc || d->getValue() > addr || (candidate && d->getValue() < candidate->getValue())) continue; candidate = d; } return candidate; } // Given a file and the index of a symbol in that file, returns a description // of all references to that symbol from that file. If no debug information is // available, returns just the name of the file, else one string per actual // reference as described in the debug info. std::vector getSymbolLocations(ObjFile *file, uint32_t symIndex) { struct Location { Symbol *sym; std::pair fileLine; }; std::vector locations; for (Chunk *c : file->getChunks()) { auto *sc = dyn_cast(c); if (!sc) continue; for (const coff_relocation &r : sc->getRelocs()) { if (r.SymbolTableIndex != symIndex) continue; std::pair fileLine = getFileLine(sc, r.VirtualAddress); Symbol *sym = getSymbol(sc, r.VirtualAddress); if (!fileLine.first.empty() || sym) locations.push_back({sym, fileLine}); } } if (locations.empty()) return std::vector({"\n>>> referenced by " + toString(file)}); std::vector symbolLocations(locations.size()); size_t i = 0; for (Location loc : locations) { llvm::raw_string_ostream os(symbolLocations[i++]); os << "\n>>> referenced by "; if (!loc.fileLine.first.empty()) os << loc.fileLine.first << ":" << loc.fileLine.second << "\n>>> "; os << toString(file); if (loc.sym) os << ":(" << toString(*loc.sym) << ')'; } return symbolLocations; } // For an undefined symbol, stores all files referencing it and the index of // the undefined symbol in each file. struct UndefinedDiag { Symbol *sym; struct File { ObjFile *oFile; uint64_t symIndex; }; std::vector files; }; static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) { std::string out; llvm::raw_string_ostream os(out); os << "undefined symbol: " << toString(*undefDiag.sym); const size_t maxUndefReferences = 10; size_t i = 0, numRefs = 0; for (const UndefinedDiag::File &ref : undefDiag.files) { std::vector symbolLocations = getSymbolLocations(ref.oFile, ref.symIndex); numRefs += symbolLocations.size(); for (const std::string &s : symbolLocations) { if (i >= maxUndefReferences) break; os << s; i++; } } if (i < numRefs) os << "\n>>> referenced " << numRefs - i << " more times"; errorOrWarn(os.str()); } void SymbolTable::loadMinGWAutomaticImports() { for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef) continue; if (!sym->isUsedInRegularObj) continue; StringRef name = undef->getName(); if (name.startswith("__imp_")) continue; // If we have an undefined symbol, but we have a Lazy representing a // symbol we could load from file, make sure to load that. Lazy *l = dyn_cast_or_null(find(("__imp_" + name).str())); if (!l || l->pendingArchiveLoad) continue; log("Loading lazy " + l->getName() + " from " + l->file->getName() + " for automatic import"); l->pendingArchiveLoad = true; l->file->addMember(&l->sym); } } bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { if (name.startswith("__imp_")) return false; Defined *imp = dyn_cast_or_null(find(("__imp_" + name).str())); if (!imp) return false; // Replace the reference directly to a variable with a reference // to the import address table instead. This obviously isn't right, // but we mark the symbol as IsRuntimePseudoReloc, and a later pass // will add runtime pseudo relocations for every relocation against // this Symbol. The runtime pseudo relocation framework expects the // reference itself to point at the IAT entry. size_t impSize = 0; if (isa(imp)) { log("Automatically importing " + name + " from " + cast(imp)->getDLLName()); impSize = sizeof(DefinedImportData); } else if (isa(imp)) { log("Automatically importing " + name + " from " + toString(cast(imp)->file)); impSize = sizeof(DefinedRegular); } else { warn("unable to automatically import " + name + " from " + imp->getName() + " from " + toString(cast(imp)->file) + "; unexpected symbol type"); return false; } sym->replaceKeepingName(imp, impSize); sym->isRuntimePseudoReloc = true; // There may exist symbols named .refptr. which only consist // of a single pointer to . If it turns out is // automatically imported, we don't need to keep the .refptr. // pointer at all, but redirect all accesses to it to the IAT entry // for __imp_ instead, and drop the whole .refptr. chunk. DefinedRegular *refptr = dyn_cast_or_null(find((".refptr." + name).str())); if (refptr && refptr->getChunk()->getSize() == config->wordsize) { SectionChunk *sc = dyn_cast_or_null(refptr->getChunk()); if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) { log("Replacing .refptr." + name + " with " + imp->getName()); refptr->getChunk()->live = false; refptr->replaceKeepingName(imp, impSize); } } return true; } void SymbolTable::reportRemainingUndefines() { SmallPtrSet undefs; DenseMap localImports; for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef) continue; if (!sym->isUsedInRegularObj) continue; StringRef name = undef->getName(); // A weak alias may have been resolved, so check for that. if (Defined *d = undef->getWeakAlias()) { // We want to replace Sym with D. However, we can't just blindly // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an // internal symbol, and internal symbols are stored as "unparented" // Symbols. For that reason we need to check which type of symbol we // are dealing with and copy the correct number of bytes. if (isa(d)) memcpy(sym, d, sizeof(DefinedRegular)); else if (isa(d)) memcpy(sym, d, sizeof(DefinedAbsolute)); else memcpy(sym, d, sizeof(SymbolUnion)); continue; } // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. if (name.startswith("__imp_")) { Symbol *imp = find(name.substr(strlen("__imp_"))); if (imp && isa(imp)) { auto *d = cast(imp); replaceSymbol(sym, name, d); localImportChunks.push_back(cast(sym)->getChunk()); localImports[sym] = d; continue; } } // We don't want to report missing Microsoft precompiled headers symbols. // A proper message will be emitted instead in PDBLinker::aquirePrecompObj if (name.contains("_PchSym_")) continue; if (config->mingw && handleMinGWAutomaticImport(sym, name)) continue; // Remaining undefined symbols are not fatal if /force is specified. // They are replaced with dummy defined symbols. if (config->forceUnresolved) replaceSymbol(sym, name, 0); undefs.insert(sym); } if (undefs.empty() && localImports.empty()) return; for (Symbol *b : config->gcroot) { if (undefs.count(b)) errorOrWarn(": undefined symbol: " + toString(*b)); if (config->warnLocallyDefinedImported) if (Symbol *imp = localImports.lookup(b)) warn(": locally defined symbol imported: " + toString(*imp) + " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); } std::vector undefDiags; DenseMap firstDiag; for (ObjFile *file : ObjFile::instances) { size_t symIndex = (size_t)-1; for (Symbol *sym : file->getSymbols()) { ++symIndex; if (!sym) continue; if (undefs.count(sym)) { auto it = firstDiag.find(sym); if (it == firstDiag.end()) { firstDiag[sym] = undefDiags.size(); undefDiags.push_back({sym, {{file, symIndex}}}); } else { undefDiags[it->second].files.push_back({file, symIndex}); } } if (config->warnLocallyDefinedImported) if (Symbol *imp = localImports.lookup(sym)) warn(toString(file) + ": locally defined symbol imported: " + toString(*imp) + " (defined in " + toString(imp->getFile()) + ") [LNK4217]"); } } for (const UndefinedDiag& undefDiag : undefDiags) reportUndefinedSymbol(undefDiag); } std::pair SymbolTable::insert(StringRef name) { bool inserted = false; Symbol *&sym = symMap[CachedHashStringRef(name)]; if (!sym) { sym = reinterpret_cast(make()); sym->isUsedInRegularObj = false; sym->pendingArchiveLoad = false; inserted = true; } return {sym, inserted}; } std::pair SymbolTable::insert(StringRef name, InputFile *file) { std::pair result = insert(name); if (!file || !isa(file)) result.first->isUsedInRegularObj = true; return result; } Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f, bool isWeakAlias) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name, f); if (wasInserted || (isa(s) && isWeakAlias)) { replaceSymbol(s, name); return s; } if (auto *l = dyn_cast(s)) { if (!s->pendingArchiveLoad) { s->pendingArchiveLoad = true; l->file->addMember(&l->sym); } } return s; } void SymbolTable::addLazy(ArchiveFile *f, const Archive::Symbol sym) { StringRef name = sym.getName(); Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name); if (wasInserted) { replaceSymbol(s, f, sym); return; } auto *u = dyn_cast(s); if (!u || u->weakAlias || s->pendingArchiveLoad) return; s->pendingArchiveLoad = true; f->addMember(&sym); } void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile) { std::string msg = "duplicate symbol: " + toString(*existing) + " in " + toString(existing->getFile()) + " and in " + toString(newFile); if (config->forceMultiple) warn(msg); else error(msg); } Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) replaceSymbol(s, n, sym); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) replaceSymbol(s, n, va); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) replaceSymbol(s, n, c); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addRegular(InputFile *f, StringRef n, const coff_symbol_generic *sym, SectionChunk *c) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); if (wasInserted || !isa(s)) replaceSymbol(s, f, n, /*IsCOMDAT*/ false, /*IsExternal*/ true, sym, c); else reportDuplicate(s, f); return s; } std::pair SymbolTable::addComdat(InputFile *f, StringRef n, const coff_symbol_generic *sym) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); if (wasInserted || !isa(s)) { replaceSymbol(s, f, n, /*IsCOMDAT*/ true, /*IsExternal*/ true, sym, nullptr); return {cast(s), true}; } auto *existingSymbol = cast(s); if (!existingSymbol->isCOMDAT) reportDuplicate(s, f); return {existingSymbol, false}; } Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size, const coff_symbol_generic *sym, CommonChunk *c) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, f); if (wasInserted || !isa(s)) replaceSymbol(s, f, n, size, sym, c); else if (auto *dc = dyn_cast(s)) if (size > dc->getSize()) replaceSymbol(s, f, n, size, sym, c); return s; } Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) { replaceSymbol(s, n, f); return s; } reportDuplicate(s, f); return nullptr; } Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id, uint16_t machine) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || isa(s)) { replaceSymbol(s, name, id, machine); return s; } reportDuplicate(s, id->file); return nullptr; } std::vector SymbolTable::getChunks() { std::vector res; for (ObjFile *file : ObjFile::instances) { ArrayRef v = file->getChunks(); res.insert(res.end(), v.begin(), v.end()); } return res; } Symbol *SymbolTable::find(StringRef name) { return symMap.lookup(CachedHashStringRef(name)); } Symbol *SymbolTable::findUnderscore(StringRef name) { if (config->machine == I386) return find(("_" + name).str()); return find(name); } // Return all symbols that start with Prefix, possibly ignoring the first // character of Prefix or the first character symbol. std::vector SymbolTable::getSymsWithPrefix(StringRef prefix) { std::vector syms; for (auto pair : symMap) { StringRef name = pair.first.val(); if (name.startswith(prefix) || name.startswith(prefix.drop_front()) || name.drop_front().startswith(prefix) || name.drop_front().startswith(prefix.drop_front())) { syms.push_back(pair.second); } } return syms; } Symbol *SymbolTable::findMangle(StringRef name) { if (Symbol *sym = find(name)) if (!isa(sym)) return sym; // Efficient fuzzy string lookup is impossible with a hash table, so iterate // the symbol table once and collect all possibly matching symbols into this // vector. Then compare each possibly matching symbol with each possible // mangling. std::vector syms = getSymsWithPrefix(name); auto findByPrefix = [&syms](const Twine &t) -> Symbol * { std::string prefix = t.str(); for (auto *s : syms) if (s->getName().startswith(prefix)) return s; return nullptr; }; // For non-x86, just look for C++ functions. if (config->machine != I386) return findByPrefix("?" + name + "@@Y"); if (!name.startswith("_")) return nullptr; // Search for x86 stdcall function. if (Symbol *s = findByPrefix(name + "@")) return s; // Search for x86 fastcall function. if (Symbol *s = findByPrefix("@" + name.substr(1) + "@")) return s; // Search for x86 vectorcall function. if (Symbol *s = findByPrefix(name.substr(1) + "@@")) return s; // Search for x86 C++ non-member function. return findByPrefix("?" + name.substr(1) + "@@Y"); } Symbol *SymbolTable::addUndefined(StringRef name) { return addUndefined(name, nullptr, false); } std::vector SymbolTable::compileBitcodeFiles() { lto.reset(new BitcodeCompiler); for (BitcodeFile *f : BitcodeFile::instances) lto->add(*f); return lto->compile(); } void SymbolTable::addCombinedLTOObjects() { if (BitcodeFile::instances.empty()) return; ScopedTimer t(ltoTimer); for (StringRef object : compileBitcodeFiles()) { auto *obj = make(MemoryBufferRef(object, "lto.tmp")); obj->parse(); ObjFile::instances.push_back(obj); } } } // namespace coff } // namespace lld