diff options
author | David Kalnischkies <david@kalnischkies.de> | 2014-06-20 21:33:56 +0200 |
---|---|---|
committer | David Kalnischkies <david@kalnischkies.de> | 2014-09-27 00:09:35 +0200 |
commit | 78a5476f3177a2a74ae51a1878c26ca322a25003 (patch) | |
tree | ce7572f671deaf5e07e01fff64432daaf97e4bd5 /apt-pkg | |
parent | a3a91fd7bab34983a008854728baf26034d4033e (diff) |
drop stored StringItems in favor of in-memory mappings
Strings like Section names or architectures are needed vary often.
Instead of writing them each time we need them, we deploy sharing for
these special strings. Until now, this was done with a linked list of
strings in which we would search, which was stored in the cache.
It turns out we can do this just as well in memory as well with a bunch
of std::map's.
In memory means here that it isn't available anymore if we have a partly
invalid cache, but that isn't much of a problem in practice as the
status file is compared to the other files we parse very small and includes
mostly duplicates, so the space we would gain by storing is more or less
equal to the size of the stored linked list…
Diffstat (limited to 'apt-pkg')
-rw-r--r-- | apt-pkg/deb/debindexfile.cc | 2 | ||||
-rw-r--r-- | apt-pkg/deb/deblistparser.cc | 45 | ||||
-rw-r--r-- | apt-pkg/deb/deblistparser.h | 1 | ||||
-rw-r--r-- | apt-pkg/edsp/edspindexfile.cc | 2 | ||||
-rw-r--r-- | apt-pkg/pkgcache.cc | 2 | ||||
-rw-r--r-- | apt-pkg/pkgcache.h | 30 | ||||
-rw-r--r-- | apt-pkg/pkgcachegen.cc | 81 | ||||
-rw-r--r-- | apt-pkg/pkgcachegen.h | 18 |
8 files changed, 57 insertions, 124 deletions
diff --git a/apt-pkg/deb/debindexfile.cc b/apt-pkg/deb/debindexfile.cc index feda8d0d8..c35a89ade 100644 --- a/apt-pkg/deb/debindexfile.cc +++ b/apt-pkg/deb/debindexfile.cc @@ -619,7 +619,7 @@ bool debStatusIndex::Merge(pkgCacheGenerator &Gen,OpProgress *Prog) const pkgCache::PkgFileIterator CFile = Gen.GetCurFile(); CFile->Size = Pkg.FileSize(); CFile->mtime = Pkg.ModificationTime(); - map_stringitem_t const storage = Gen.WriteUniqString("now"); + map_stringitem_t const storage = Gen.StoreString(pkgCacheGenerator::MIXED, "now"); CFile->Archive = storage; if (Gen.MergeList(Parser) == false) diff --git a/apt-pkg/deb/deblistparser.cc b/apt-pkg/deb/deblistparser.cc index 3e0d3a791..9919d2036 100644 --- a/apt-pkg/deb/deblistparser.cc +++ b/apt-pkg/deb/deblistparser.cc @@ -58,18 +58,6 @@ debListParser::debListParser(FileFd *File, string const &Arch) : Tags(File), MultiArchEnabled = Architectures.size() > 1; } /*}}}*/ -// ListParser::UniqFindTagWrite - Find the tag and write a unq string /*{{{*/ -// --------------------------------------------------------------------- -/* */ -map_stringitem_t debListParser::UniqFindTagWrite(const char *Tag) -{ - const char *Start; - const char *Stop; - if (Section.Find(Tag,Start,Stop) == false) - return 0; - return WriteUniqString(Start,Stop - Start); -} - /*}}}*/ // ListParser::Package - Return the package name /*{{{*/ // --------------------------------------------------------------------- /* This is to return the name of the package this section describes */ @@ -144,9 +132,16 @@ unsigned char debListParser::ParseMultiArch(bool const showErrors) /*{{{*/ /* */ bool debListParser::NewVersion(pkgCache::VerIterator &Ver) { + const char *Start; + const char *Stop; + // Parse the section - unsigned long const idxSection = UniqFindTagWrite("Section"); - Ver->Section = idxSection; + if (Section.Find("Section",Start,Stop) == true) + { + map_stringitem_t const idx = StoreString(pkgCacheGenerator::SECTION, Start, Stop - Start); + Ver->Section = idx; + } + Ver->MultiArch = ParseMultiArch(true); // Archive Size Ver->Size = Section.FindULL("Size"); @@ -155,10 +150,8 @@ bool debListParser::NewVersion(pkgCache::VerIterator &Ver) Ver->InstalledSize *= 1024; // Priority - const char *Start; - const char *Stop; if (Section.Find("Priority",Start,Stop) == true) - { + { if (GrabWord(string(Start,Stop-Start),PrioList,Ver->Priority) == false) Ver->Priority = pkgCache::State::Extra; } @@ -891,7 +884,7 @@ bool debListParser::LoadReleaseInfo(pkgCache::PkgFileIterator &FileI, { // apt-secure does no longer download individual (per-section) Release // file. to provide Component pinning we use the section name now - map_stringitem_t const storage = WriteUniqString(component); + map_stringitem_t const storage = StoreString(pkgCacheGenerator::MIXED, component); FileI->Component = storage; pkgTagFile TagFile(&File, File.Size()); @@ -900,19 +893,19 @@ bool debListParser::LoadReleaseInfo(pkgCache::PkgFileIterator &FileI, return false; std::string data; - #define APT_INRELEASE(TAG, STORE) \ + #define APT_INRELEASE(TYPE, TAG, STORE) \ data = Section.FindS(TAG); \ if (data.empty() == false) \ { \ - map_stringitem_t const storage = WriteUniqString(data); \ + map_stringitem_t const storage = StoreString(pkgCacheGenerator::TYPE, data); \ STORE = storage; \ } - APT_INRELEASE("Suite", FileI->Archive) - APT_INRELEASE("Component", FileI->Component) - APT_INRELEASE("Version", FileI->Version) - APT_INRELEASE("Origin", FileI->Origin) - APT_INRELEASE("Codename", FileI->Codename) - APT_INRELEASE("Label", FileI->Label) + APT_INRELEASE(MIXED, "Suite", FileI->Archive) + APT_INRELEASE(MIXED, "Component", FileI->Component) + APT_INRELEASE(VERSION, "Version", FileI->Version) + APT_INRELEASE(MIXED, "Origin", FileI->Origin) + APT_INRELEASE(MIXED, "Codename", FileI->Codename) + APT_INRELEASE(MIXED, "Label", FileI->Label) #undef APT_INRELEASE Section.FindFlag("NotAutomatic", FileI->Flags, pkgCache::Flag::NotAutomatic); Section.FindFlag("ButAutomaticUpgrades", FileI->Flags, pkgCache::Flag::ButAutomaticUpgrades); diff --git a/apt-pkg/deb/deblistparser.h b/apt-pkg/deb/deblistparser.h index f5ac47e1e..b55e57d41 100644 --- a/apt-pkg/deb/deblistparser.h +++ b/apt-pkg/deb/deblistparser.h @@ -49,7 +49,6 @@ class debListParser : public pkgCacheGenerator::ListParser std::vector<std::string> Architectures; bool MultiArchEnabled; - map_stringitem_t UniqFindTagWrite(const char *Tag); virtual bool ParseStatus(pkgCache::PkgIterator &Pkg,pkgCache::VerIterator &Ver); bool ParseDepends(pkgCache::VerIterator &Ver,const char *Tag, unsigned int Type); diff --git a/apt-pkg/edsp/edspindexfile.cc b/apt-pkg/edsp/edspindexfile.cc index e013dd44c..c38f24567 100644 --- a/apt-pkg/edsp/edspindexfile.cc +++ b/apt-pkg/edsp/edspindexfile.cc @@ -56,7 +56,7 @@ bool edspIndex::Merge(pkgCacheGenerator &Gen,OpProgress *Prog) const pkgCache::PkgFileIterator CFile = Gen.GetCurFile(); CFile->Size = Pkg.FileSize(); CFile->mtime = Pkg.ModificationTime(); - map_stringitem_t const storage = Gen.WriteUniqString("edsp::scenario"); + map_stringitem_t const storage = Gen.StoreString(pkgCacheGenerator::MIXED, "edsp::scenario"); CFile->Archive = storage; if (Gen.MergeList(Parser) == false) diff --git a/apt-pkg/pkgcache.cc b/apt-pkg/pkgcache.cc index 4b4631a65..4ab9e2a4b 100644 --- a/apt-pkg/pkgcache.cc +++ b/apt-pkg/pkgcache.cc @@ -82,7 +82,6 @@ pkgCache::Header::Header() MaxDescFileSize = 0; FileList = 0; - StringList = 0; VerSysName = 0; Architecture = 0; HashTableSize = _config->FindI("APT::Cache-HashTableSize", 10 * 1048); @@ -140,7 +139,6 @@ bool pkgCache::ReMap(bool const &Errorchecks) DescP = (Description *)Map.Data(); ProvideP = (Provides *)Map.Data(); DepP = (Dependency *)Map.Data(); - StringItemP = (StringItem *)Map.Data(); StrP = (char *)Map.Data(); if (Errorchecks == false) diff --git a/apt-pkg/pkgcache.h b/apt-pkg/pkgcache.h index 8179e05aa..4284a318c 100644 --- a/apt-pkg/pkgcache.h +++ b/apt-pkg/pkgcache.h @@ -109,7 +109,6 @@ class pkgCache /*{{{*/ struct Description; struct Provides; struct Dependency; - struct StringItem; struct VerFile; struct DescFile; @@ -186,7 +185,6 @@ class pkgCache /*{{{*/ Description *DescP; Provides *ProvideP; Dependency *DepP; - StringItem *StringItemP; char *StrP; virtual bool ReMap(bool const &Errorchecks = true); @@ -290,12 +288,6 @@ struct pkgCache::Header The PackageFile structures are singly linked lists that represent all package files that have been merged into the cache. */ map_pointer_t FileList; - /** \brief index of the first StringItem structure - - The cache contains a list of all the unique strings (StringItems). - The parser reads this list into memory so it can match strings - against it.*/ - map_pointer_t StringList; /** \brief String representing the version system used */ map_pointer_t VerSysName; /** \brief native architecture the cache was built against */ @@ -438,7 +430,7 @@ struct pkgCache::Package struct pkgCache::PackageFile { /** \brief physical disk file that this PackageFile represents */ - map_pointer_t FileName; // StringItem + map_stringitem_t FileName; /** \brief the release information Please see the files document for a description of what the @@ -606,7 +598,7 @@ struct pkgCache::Description struct pkgCache::Dependency { /** \brief string of the version the dependency is applied against */ - map_stringitem_t Version; // StringItem + map_stringitem_t Version; /** \brief index of the package this depends applies to The generator will - if the package does not already exist - @@ -656,24 +648,6 @@ struct pkgCache::Provides map_pointer_t NextPkgProv; // Provides }; /*}}}*/ -// StringItem structure /*{{{*/ -/** \brief used for generating single instances of strings - - Some things like Section Name are are useful to have as unique tags. - It is part of a linked list based at pkgCache::Header::StringList - - All strings are simply inlined any place in the file that is natural - for the writer. The client should make no assumptions about the positioning - of strings. All StringItems should be null-terminated. */ -struct pkgCache::StringItem -{ - /** \brief string this refers to */ - map_stringitem_t String; - /** \brief Next link in the chain */ - map_stringitem_t NextItem; -}; - /*}}}*/ - inline char const * pkgCache::NativeArch() { return StrP + HeaderP->Architecture; } diff --git a/apt-pkg/pkgcachegen.cc b/apt-pkg/pkgcachegen.cc index 359dffc9d..f7480bc77 100644 --- a/apt-pkg/pkgcachegen.cc +++ b/apt-pkg/pkgcachegen.cc @@ -57,8 +57,7 @@ pkgCacheGenerator::pkgCacheGenerator(DynamicMMap *pMap,OpProgress *Prog) : FoundFileDeps(0) { CurrentFile = 0; - memset(UniqHash,0,sizeof(UniqHash)); - + if (_error->PendingError() == true) return; @@ -82,9 +81,7 @@ pkgCacheGenerator::pkgCacheGenerator(DynamicMMap *pMap,OpProgress *Prog) : if (unlikely(idxVerSysName == 0)) return; Cache.HeaderP->VerSysName = idxVerSysName; - // this pointer is set in ReMap, but we need it now for WriteUniqString - Cache.StringItemP = (pkgCache::StringItem *)Map.Data(); - map_stringitem_t const idxArchitecture = WriteUniqString(_config->Find("APT::Architecture")); + map_stringitem_t const idxArchitecture = StoreString(MIXED, _config->Find("APT::Architecture")); if (unlikely(idxArchitecture == 0)) return; Cache.HeaderP->Architecture = idxArchitecture; @@ -149,10 +146,6 @@ void pkgCacheGenerator::ReMap(void const * const oldMap, void const * const newM CurrentFile += (pkgCache::PackageFile const * const) newMap - (pkgCache::PackageFile const * const) oldMap; - for (size_t i = 0; i < _count(UniqHash); ++i) - if (UniqHash[i] != 0) - UniqHash[i] += (pkgCache::StringItem const * const) newMap - (pkgCache::StringItem const * const) oldMap; - for (std::vector<pkgCache::GrpIterator*>::const_iterator i = Dynamic<pkgCache::GrpIterator>::toReMap.begin(); i != Dynamic<pkgCache::GrpIterator>::toReMap.end(); ++i) (*i)->ReMap(oldMap, newMap); @@ -685,7 +678,7 @@ bool pkgCacheGenerator::NewPackage(pkgCache::PkgIterator &Pkg,const string &Name #endif Pkg->Group = Grp.Index(); // all is mapped to the native architecture - map_stringitem_t const idxArch = (Arch == "all") ? Cache.HeaderP->Architecture : WriteUniqString(Arch.c_str()); + map_stringitem_t const idxArch = (Arch == "all") ? Cache.HeaderP->Architecture : StoreString(MIXED, Arch); if (unlikely(idxArch == 0)) return false; Pkg->Arch = idxArch; @@ -900,7 +893,7 @@ map_pointer_t pkgCacheGenerator::NewDescription(pkgCache::DescIterator &Desc, // Fill it in Desc = pkgCache::DescIterator(Cache,Cache.DescP + Description); Desc->ID = Cache.HeaderP->DescriptionCount++; - map_stringitem_t const idxlanguage_code = WriteUniqString(Lang); + map_stringitem_t const idxlanguage_code = StoreString(MIXED, Lang); if (unlikely(idxlanguage_code == 0)) return 0; Desc->language_code = idxlanguage_code; @@ -1102,7 +1095,7 @@ bool pkgCacheGenerator::SelectFile(const string &File,const string &Site, // Fill it in map_stringitem_t const idxFileName = WriteStringInMap(File); - map_stringitem_t const idxSite = WriteUniqString(Site); + map_stringitem_t const idxSite = StoreString(MIXED, Site); if (unlikely(idxFileName == 0 || idxSite == 0)) return false; CurrentFile->FileName = idxFileName; @@ -1110,7 +1103,7 @@ bool pkgCacheGenerator::SelectFile(const string &File,const string &Site, CurrentFile->NextFile = Cache.HeaderP->FileList; CurrentFile->Flags = Flags; CurrentFile->ID = Cache.HeaderP->PackageFileCount; - map_stringitem_t const idxIndexType = WriteUniqString(Index.GetType()->Label); + map_stringitem_t const idxIndexType = StoreString(MIXED, Index.GetType()->Label); if (unlikely(idxIndexType == 0)) return false; CurrentFile->IndexType = idxIndexType; @@ -1127,57 +1120,27 @@ bool pkgCacheGenerator::SelectFile(const string &File,const string &Site, // --------------------------------------------------------------------- /* This is used to create handles to strings. Given the same text it always returns the same number */ -map_stringitem_t pkgCacheGenerator::WriteUniqString(const char *S, +map_stringitem_t pkgCacheGenerator::StoreString(enum StringType const type, const char *S, unsigned int Size) { - /* We use a very small transient hash table here, this speeds up generation - by a fair amount on slower machines */ - pkgCache::StringItem *&Bucket = UniqHash[(S[0]*5 + S[1]) % _count(UniqHash)]; - if (Bucket != 0 && - stringcmp(S,S+Size,Cache.StrP + Bucket->String) == 0) - return Bucket->String; - - // Search for an insertion point - pkgCache::StringItem *I = Cache.StringItemP + Cache.HeaderP->StringList; - int Res = 1; - map_stringitem_t *Last = &Cache.HeaderP->StringList; - for (; I != Cache.StringItemP; Last = &I->NextItem, - I = Cache.StringItemP + I->NextItem) - { - Res = stringcmp(S,S+Size,Cache.StrP + I->String); - if (Res >= 0) - break; + std::string const key(S, Size); + + std::map<std::string,map_stringitem_t> * strings; + switch(type) { + case MIXED: strings = &strMixed; break; + case PKGNAME: strings = &strPkgNames; break; + case VERSION: strings = &strVersions; break; + case SECTION: strings = &strSections; break; + default: _error->Fatal("Unknown enum type used for string storage of '%s'", key.c_str()); return 0; } - - // Match - if (Res == 0) - { - Bucket = I; - return I->String; - } - - // Get a structure - void const * const oldMap = Map.Data(); - map_pointer_t const Item = AllocateInMap(sizeof(pkgCache::StringItem)); - if (Item == 0) - return 0; - - map_stringitem_t const idxString = WriteStringInMap(S,Size); - if (unlikely(idxString == 0)) - return 0; - if (oldMap != Map.Data()) { - Last += (map_pointer_t const * const) Map.Data() - (map_pointer_t const * const) oldMap; - I += (pkgCache::StringItem const * const) Map.Data() - (pkgCache::StringItem const * const) oldMap; - } - *Last = Item; - // Fill in the structure - pkgCache::StringItem *ItemP = Cache.StringItemP + Item; - ItemP->NextItem = I - Cache.StringItemP; - ItemP->String = idxString; + std::map<std::string,map_stringitem_t>::const_iterator const item = strings->find(key); + if (item != strings->end()) + return item->second; - Bucket = ItemP; - return ItemP->String; + map_stringitem_t const idxString = WriteStringInMap(S,Size); + strings->insert(std::make_pair(key, idxString)); + return idxString; } /*}}}*/ // CheckValidity - Check that a cache is up-to-date /*{{{*/ diff --git a/apt-pkg/pkgcachegen.h b/apt-pkg/pkgcachegen.h index 42da7b12d..84777b912 100644 --- a/apt-pkg/pkgcachegen.h +++ b/apt-pkg/pkgcachegen.h @@ -27,6 +27,7 @@ #include <vector> #include <string> +#include <map> class FileFd; class pkgSourceList; @@ -36,13 +37,16 @@ class pkgIndexFile; class pkgCacheGenerator /*{{{*/ { private: - - pkgCache::StringItem *UniqHash[26]; APT_HIDDEN map_stringitem_t WriteStringInMap(std::string const &String) { return WriteStringInMap(String.c_str()); }; APT_HIDDEN map_stringitem_t WriteStringInMap(const char *String); APT_HIDDEN map_stringitem_t WriteStringInMap(const char *String, const unsigned long &Len); APT_HIDDEN map_pointer_t AllocateInMap(const unsigned long &size); + std::map<std::string,map_stringitem_t> strMixed; + std::map<std::string,map_stringitem_t> strSections; + std::map<std::string,map_stringitem_t> strPkgNames; + std::map<std::string,map_stringitem_t> strVersions; + public: class ListParser; @@ -91,8 +95,9 @@ class pkgCacheGenerator /*{{{*/ public: - map_stringitem_t WriteUniqString(const char *S,unsigned int const Size); - inline map_stringitem_t WriteUniqString(const std::string &S) {return WriteUniqString(S.c_str(),S.length());}; + enum StringType { MIXED, PKGNAME, VERSION, SECTION }; + map_stringitem_t StoreString(enum StringType const type, const char * S, unsigned int const Size); + inline map_stringitem_t StoreString(enum StringType const type, const std::string &S) {return StoreString(type, S.c_str(),S.length());}; void DropProgress() {Progress = 0;}; bool SelectFile(const std::string &File,const std::string &Site,pkgIndexFile const &Index, @@ -145,8 +150,9 @@ class pkgCacheGenerator::ListParser protected: - inline map_stringitem_t WriteUniqString(std::string S) {return Owner->WriteUniqString(S);}; - inline map_stringitem_t WriteUniqString(const char *S,unsigned int Size) {return Owner->WriteUniqString(S,Size);}; + inline map_stringitem_t StoreString(pkgCacheGenerator::StringType const type, std::string const &S) {return Owner->StoreString(type, S);}; + inline map_stringitem_t StoreString(pkgCacheGenerator::StringType const type, const char *S,unsigned int Size) {return Owner->StoreString(type, S, Size);}; + inline map_stringitem_t WriteString(const std::string &S) {return Owner->WriteStringInMap(S);}; inline map_stringitem_t WriteString(const char *S,unsigned int Size) {return Owner->WriteStringInMap(S,Size);}; bool NewDepends(pkgCache::VerIterator &Ver,const std::string &Package, const std::string &Arch, |