diff options
author | Arch Librarian <arch@canonical.com> | 2004-09-20 16:56:32 +0000 |
---|---|---|
committer | Arch Librarian <arch@canonical.com> | 2004-09-20 16:56:32 +0000 |
commit | b2e465d6d32d2dc884f58b94acb7e35f671a87fe (patch) | |
tree | 5928383b9bde7b0ba9812e6526ad746466e558f7 /ftparchive/contents.cc | |
parent | 00b47c98ca4a4349686a082eba6d77decbb03a4d (diff) |
Join with aliencode
Author: jgg
Date: 2001-02-20 07:03:16 GMT
Join with aliencode
Diffstat (limited to 'ftparchive/contents.cc')
-rw-r--r-- | ftparchive/contents.cc | 401 |
1 files changed, 401 insertions, 0 deletions
diff --git a/ftparchive/contents.cc b/ftparchive/contents.cc new file mode 100644 index 000000000..145a68781 --- /dev/null +++ b/ftparchive/contents.cc @@ -0,0 +1,401 @@ +// -*- mode: cpp; mode: fold -*- +// Description /*{{{*/ +// $Id: contents.cc,v 1.2 2001/02/20 07:03:18 jgg Exp $ +/* ###################################################################### + + contents - Archive contents generator + + The GenContents class is a back end for an archive contents generator. + It takes a list of per-deb file name and merges it into a memory + database of all previous output. This database is stored as a set + of binary trees linked across directories to form a tree of all files+dirs + given to it. The tree will also be sorted as it is built up thus + removing the massive sort time overhead. + + By breaking all the pathnames into components and storing them + separately a space savings is realized by not duplicating the string + over and over again. Ultimately this saving is sacrificed to storage of + the tree structure itself but the tree structure yields a speed gain + in the sorting and processing. Ultimately it takes about 5 seconds to + do 141000 nodes and about 5 meg of ram. + + The tree looks something like: + + usr/ + / \ / libslang + bin/ lib/ --> libc6 + / \ \ libfoo + games/ sbin/ + + The ---> is the DirDown link + + + ##################################################################### */ + /*}}}*/ +// Include Files /*{{{*/ +#include "contents.h" + +#include <apt-pkg/extracttar.h> +#include <apt-pkg/error.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <malloc.h> + /*}}}*/ + +// GenContents::~GenContents - Free allocated memory /*{{{*/ +// --------------------------------------------------------------------- +/* Since all our allocations are static big-block allocations all that is + needed is to free all of them. */ +GenContents::~GenContents() +{ + while (BlockList != 0) + { + BigBlock *Old = BlockList; + BlockList = Old->Next; + free(Old->Block); + delete Old; + } +} + /*}}}*/ +// GenContents::Mystrdup - Custom strdup /*{{{*/ +// --------------------------------------------------------------------- +/* This strdup also uses a large block allocator to eliminate glibc + overhead */ +char *GenContents::Mystrdup(const char *From) +{ + unsigned int Len = strlen(From) + 1; + if (StrLeft <= Len) + { + StrLeft = 4096*10; + StrPool = (char *)malloc(StrLeft); + + BigBlock *Block = new BigBlock; + Block->Block = StrPool; + Block->Next = BlockList; + BlockList = Block; + } + + memcpy(StrPool,From,Len); + StrLeft -= Len; + + char *Res = StrPool; + StrPool += Len; + return Res; +} + /*}}}*/ +// GenContents::Node::operator new - Big block allocator /*{{{*/ +// --------------------------------------------------------------------- +/* This eliminates glibc's malloc overhead by allocating large blocks and + having a continuous set of Nodes. This takes about 8 bytes off each nodes + space needs. Freeing is not supported. */ +void *GenContents::Node::operator new(size_t Amount,GenContents *Owner) +{ + if (Owner->NodeLeft == 0) + { + Owner->NodeLeft = 10000; + Owner->NodePool = (Node *)malloc(Amount*Owner->NodeLeft); + BigBlock *Block = new BigBlock; + Block->Block = Owner->NodePool; + Block->Next = Owner->BlockList; + Owner->BlockList = Block; + } + + Owner->NodeLeft--; + return Owner->NodePool++; +} + /*}}}*/ +// GenContents::Grab - Grab a new node representing Name under Top /*{{{*/ +// --------------------------------------------------------------------- +/* This grabs a new node representing the pathname component Name under + the node Top. The node is given the name Package. It is assumed that Name + is inside of top. If a duplicate already entered name is found then + a note is made on the Dup list and the previous in-tree node is returned. */ +GenContents::Node *GenContents::Grab(GenContents::Node *Top,const char *Name, + const char *Package) +{ + /* We drop down to the next dir level each call. This simplifies + the calling routine */ + if (Top->DirDown == 0) + { + Node *Item = new(this) Node; + Item->Path = Mystrdup(Name); + Item->Package = Package; + Top->DirDown = Item; + return Item; + } + Top = Top->DirDown; + + int Res; + while (1) + { + Res = strcmp(Name,Top->Path); + + // Collision! + if (Res == 0) + { + // See if this the the same package (multi-version dup) + if (Top->Package == Package || + strcasecmp(Top->Package,Package) == 0) + return Top; + + // Look for an already existing Dup + for (Node *I = Top->Dups; I != 0; I = I->Dups) + if (I->Package == Package || + strcasecmp(I->Package,Package) == 0) + return Top; + + // Add the dup in + Node *Item = new(this) Node; + Item->Path = Top->Path; + Item->Package = Package; + Item->Dups = Top->Dups; + Top->Dups = Item; + return Top; + } + + // Continue to traverse the tree + if (Res < 0) + { + if (Top->BTreeLeft == 0) + break; + Top = Top->BTreeLeft; + } + else + { + if (Top->BTreeRight == 0) + break; + Top = Top->BTreeRight; + } + } + + // The item was not found in the tree + Node *Item = new(this) Node; + Item->Path = Mystrdup(Name); + Item->Package = Package; + + // Link it into the tree + if (Res < 0) + { + Item->BTreeLeft = Top->BTreeLeft; + Top->BTreeLeft = Item; + } + else + { + Item->BTreeRight = Top->BTreeRight; + Top->BTreeRight = Item; + } + + return Item; +} + /*}}}*/ +// GenContents::Add - Add a path to the tree /*{{{*/ +// --------------------------------------------------------------------- +/* This takes a full pathname and adds it into the tree. We split the + pathname into directory fragments adding each one as we go. Technically + in output from tar this should result in hitting previous items. */ +void GenContents::Add(const char *Dir,const char *Package) +{ + Node *Root = &this->Root; + + // Drop leading slashes + while (*Dir == '/' && *Dir != 0) + Dir++; + + // Run over the string and grab out each bit up to and including a / + const char *Start = Dir; + const char *I = Dir; + while (*I != 0) + { + if (*I != '/' || I - Start <= 1) + { + I++; + continue; + } + I++; + + // Copy the path fragment over + char Tmp[1024]; + strncpy(Tmp,Start,I - Start); + Tmp[I - Start] = 0; + + // Grab a node for it + Root = Grab(Root,Tmp,Package); + + Start = I; + } + + // The final component if it does not have a trailing / + if (I - Start >= 1) + Root = Grab(Root,Start,Package); +} + /*}}}*/ +// GenContents::WriteSpace - Write a given number of white space chars /*{{{*/ +// --------------------------------------------------------------------- +/* We mod 8 it and write tabs where possible. */ +void GenContents::WriteSpace(FILE *Out,unsigned int Current,unsigned int Target) +{ + if (Target <= Current) + Target = Current + 1; + + /* Now we write tabs so long as the next tab stop would not pass + the target */ + for (; (Current/8 + 1)*8 < Target; Current = (Current/8 + 1)*8) + fputc('\t',Out); + + // Fill the last bit with spaces + for (; Current < Target; Current++) + fputc(' ',Out); +} + /*}}}*/ +// GenContents::Print - Display the tree /*{{{*/ +// --------------------------------------------------------------------- +/* This is the final result function. It takes the tree and recursively + calls itself and runs over each section of the tree printing out + the pathname and the hit packages. We use Buf to build the pathname + summed over all the directory parents of this node. */ +void GenContents::Print(FILE *Out) +{ + char Buffer[1024]; + DoPrint(Out,&Root,Buffer); +} +void GenContents::DoPrint(FILE *Out,GenContents::Node *Top, char *Buf) +{ + if (Top == 0) + return; + + // Go left + DoPrint(Out,Top->BTreeLeft,Buf); + + // Print the current dir location and then descend to lower dirs + char *OldEnd = Buf + strlen(Buf); + if (Top->Path != 0) + { + strcat(Buf,Top->Path); + + // Do not show the item if it is a directory with dups + if (Top->Path[strlen(Top->Path)-1] != '/' /*|| Top->Dups == 0*/) + { + fputs(Buf,Out); + WriteSpace(Out,strlen(Buf),60); + for (Node *I = Top; I != 0; I = I->Dups) + { + if (I != Top) + fputc(',',Out); + fputs(I->Package,Out); + } + fputc('\n',Out); + } + } + + // Go along the directory link + DoPrint(Out,Top->DirDown,Buf); + *OldEnd = 0; + + // Go right + DoPrint(Out,Top->BTreeRight,Buf); +} + /*}}}*/ + +// ContentsExtract::Read - Read the archive /*{{{*/ +// --------------------------------------------------------------------- +/* */ +bool ContentsExtract::Read(debDebFile &Deb) +{ + Reset(); + + // Get the archive member and positition the file + const ARArchive::Member *Member = Deb.GotoMember("data.tar.gz"); + if (Member == 0) + return false; + + // Extract it. + ExtractTar Tar(Deb.GetFile(),Member->Size); + if (Tar.Go(*this) == false) + return false; + return true; +} + /*}}}*/ +// ContentsExtract::DoItem - Extract an item /*{{{*/ +// --------------------------------------------------------------------- +/* This just tacks the name onto the end of our memory buffer */ +bool ContentsExtract::DoItem(Item &Itm,int &Fd) +{ + unsigned long Len = strlen(Itm.Name); + + // Strip leading ./'s + if (Itm.Name[0] == '.' && Itm.Name[1] == '/') + { + // == './' + if (Len == 2) + return true; + + Len -= 2; + Itm.Name += 2; + } + + // Allocate more storage for the string list + if (CurSize + Len + 2 >= MaxSize || Data == 0) + { + if (MaxSize == 0) + MaxSize = 512*1024/2; + char *NewData = (char *)realloc(Data,MaxSize*2); + if (NewData == 0) + return _error->Error("realloc - Failed to allocate memory"); + Data = NewData; + MaxSize *= 2; + } + + strcpy(Data+CurSize,Itm.Name); + CurSize += Len + 1; + return true; +} + /*}}}*/ +// ContentsExtract::TakeContents - Load the contents data /*{{{*/ +// --------------------------------------------------------------------- +/* */ +bool ContentsExtract::TakeContents(const void *NewData,unsigned long Length) +{ + if (Length == 0) + { + CurSize = 0; + return true; + } + + // Allocate more storage for the string list + if (Length + 2 >= MaxSize || Data == 0) + { + if (MaxSize == 0) + MaxSize = 512*1024/2; + while (MaxSize*2 <= Length) + MaxSize *= 2; + + char *NewData = (char *)realloc(Data,MaxSize*2); + if (NewData == 0) + return _error->Error("realloc - Failed to allocate memory"); + Data = NewData; + MaxSize *= 2; + } + memcpy(Data,NewData,Length); + CurSize = Length; + + return Data[CurSize-1] == 0; +} + /*}}}*/ +// ContentsExtract::Add - Read the contents data into the sorter /*{{{*/ +// --------------------------------------------------------------------- +/* */ +void ContentsExtract::Add(GenContents &Contents,string Package) +{ + const char *Start = Data; + char *Pkg = Contents.Mystrdup(Package.c_str()); + for (const char *I = Data; I < Data + CurSize; I++) + { + if (*I == 0) + { + Contents.Add(Start,Pkg); + Start = ++I; + } + } +} + /*}}}*/ |