Mhmmm, my previous example was not very clear.Why would that help? TC would still need 4 GB of address space to compare two 2 GB files.
Below there is a fully working piece of code, that demonstrates the idea. This code counts a sum of all bytes of the (potentially) large file, using only a small buffer. The beginning and the end of this buffer get access protection, so buffer underrun/overrun causes an exception. Exception handler slides the buffer along the file contents and then continues counting. Thanks to using exceptions, counting procedure (GetSumOfFileBytes) requires no additional range checking, so it can be relatively fast. Exceptions are slow, but they are raised rarely. Moreover, reading file contents is much slower. For these two reasons, exceptions have no significant impact on the overall performance. This code was tested with a 4,5GB file and with only 64kB buffer.
In order to compare two files, this code must be expanded to handle two buffers. Address of exception will tell us which buffer must be moved along its file.
Summary: there are two possibilities of handling large files:
1) Small files are being handled as today (fast). Large files are being handled with a smaller buffer and with an additional range checking (slower).
2) Small files are being handled as today (fast). Large files are being handled with a smaller buffer and with an exception handler (fast, the code a bit more complicated).
The code below fixes also some problem with my previous example (dwPageSize replaced with dwAllocationGranularity). It was tested with Win98 and Win2000, with Delphi2, Delphi5 and BDS 2006.
Code: Select all
{$IFNDEF WIN64}
type
NativeUInt = DWord;
{$ENDIF}
{$IFDEF VER90} {Delphi 2}
threadvar
ExceptionRecord : PExceptionRecord;
var
OrigExceptObjProc : function(P : PExceptionRecord) : Exception;
function GetExceptionObject(P : PExceptionRecord) : Exception;
begin
{EAccessViolation is not derived from EExternal in Delphi 2,
so we must save a pointer to the ExceptionRecord}
ExceptionRecord:=P;
Result:=OrigExceptObjProc(P);
end;
{$ENDIF}
var
GlobalSumOfFileBytes : DWord;
GlobalBytesToCountHi : DWord;
GlobalBytesToCountLo : DWord;
procedure Add64(var Value1Hi, Value1Lo : DWord; Value2Hi, Value2Lo : DWord);
var
NewValue1Lo : DWord;
begin
NewValue1Lo:=Value1Lo+Value2Lo;
if NewValue1Lo < Value1Lo then
Inc(Value1Hi);
Value1Lo:=NewValue1Lo;
Inc(Value1Hi,Value2Hi);
end;
procedure Sub64(var Value1Hi, Value1Lo : DWord; Value2Hi, Value2Lo : DWord);
var
NewValue1Lo : DWord;
begin
NewValue1Lo:=Value1Lo-Value2Lo;
if NewValue1Lo > Value1Lo then
Dec(Value1Hi);
Value1Lo:=NewValue1Lo;
Dec(Value1Hi,Value2Hi);
end;
{No additional range checking - no overhead}
procedure GetSumOfFileBytes(var Sum : DWord; var BytesToCountHi, BytesToCountLo : DWord; P : PByte);
begin
while (BytesToCountHi <> 0) or (BytesToCountLo <> 0) do
begin
Inc(Sum,P^);
Inc(P);
Sub64(BytesToCountHi,BytesToCountLo,0,1);
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
var
SystemInfo : TSystemInfo;
BufferSize : DWord;
BufferDataSize : DWord;
HF : THandle;
HM : THandle;
PM : Pointer;
PM2 : PByte;
FileSizeHi : DWord;
FileSizeLo : DWord;
FileOffsetHi : DWord;
FileOffsetLo : DWord;
FileSpaceLeftHi : DWord;
FileSpaceLeftLo : DWord;
OldProtect : DWord;
begin
{$IFDEF VER90} {Delphi 2}
if not Assigned(OrigExceptObjProc) then
begin
{Instead of here, this should be called once in FormCreate}
OrigExceptObjProc:=ExceptObjProc;
ExceptObjProc:=@GetExceptionObject;
end;
{$ENDIF}
{dwAllocationGranularity is 64kB on x86}
GetSystemInfo(SystemInfo);
BufferDataSize:=1*SystemInfo.dwAllocationGranularity; {Place any value instead of 1}
BufferSize:=BufferDataSize+2*SystemInfo.dwAllocationGranularity;
{PHASE 1: Open file}
HF:=CreateFile('test.bin',GENERIC_READ,FILE_SHARE_READ,nil,OPEN_EXISTING,0,0);
if HF = INVALID_HANDLE_VALUE then
raise Exception.CreateFmt('API call failed with error code %d',[GetLastError]);
with SystemInfo do
try
FileSizeLo:=GetFileSize(HF,@FileSizeHi);
if (FileSizeLo = $FFFFFFFF) and (GetLastError <> ERROR_SUCCESS) then
raise Exception.CreateFmt('API call failed with error code %d',[GetLastError]);
if (FileSizeHi = 0) and (FileSizeLo < BufferSize) then
Exit; {File is smaller than the buffer - normal, simple method is enough - exit}
{PHASE 2: Create file mapping}
HM:=CreateFileMapping(HF,nil,PAGE_READONLY,0,0,nil);
if HM = 0 then
raise Exception.CreateFmt('API call failed with error code %d',[GetLastError]);
try
{PHASE 3: Map view of file}
PM:=nil;
try
{Initialization}
GlobalSumOfFileBytes:=0;
GlobalBytesToCountHi:=FileSizeHi;
GlobalBytesToCountLo:=FileSizeLo;
FileOffsetHi:=0;
FileOffsetLo:=0;
while True do {In case of touching a guard page, we remap the file and try again}
try
FileSpaceLeftHi:=FileSizeHi;
FileSpaceLeftLo:=FileSizeLo;
Sub64(FileSpaceLeftHi,FileSpaceLeftLo,FileOffsetHi,FileOffsetLo);
if (FileSpaceLeftHi = 0) and (FileSpaceLeftLo < BufferSize) then
PM:=MapViewOfFile(HM,FILE_MAP_READ,FileOffsetHi,FileOffsetLo,FileSpaceLeftLo)
else
PM:=MapViewOfFile(HM,FILE_MAP_READ,FileOffsetHi,FileOffsetLo,BufferSize);
if PM = nil then
raise Exception.CreateFmt('API call failed with error code %d',[GetLastError]);
PM2:=PM;
{Create guard page at the beginning of the buffer (only if not at the beginning of the file)}
if (FileOffsetHi <> 0) or (FileOffsetLo <> 0) then
begin
VirtualProtect(PM,dwAllocationGranularity,PAGE_NOACCESS,@OldProtect);
Inc(PM2,dwAllocationGranularity);
end;
{Create guard page at the end of the buffer}
VirtualProtect(Pointer(NativeUInt(PM)+BufferSize-dwAllocationGranularity),dwAllocationGranularity,PAGE_NOACCESS,@OldProtect);
{Usage of the memory mapped file}
{Touching guard pages will raise an EAccessViolation}
GetSumOfFileBytes(GlobalSumOfFileBytes,GlobalBytesToCountHi,GlobalBytesToCountLo,PM2);
Application.MessageBox(PChar('Finished, sum of file bytes is '+IntToStr(GlobalSumOfFileBytes)),'OK',MB_OK or MB_ICONINFORMATION);
Exit; {We finished counting sum of bytes}
except
on E:EAccessViolation do
begin
if PM <> nil then
begin
VirtualProtect(PM,dwAllocationGranularity,PAGE_READONLY,@OldProtect); {For Win9x}
VirtualProtect(Pointer(NativeUInt(PM)+BufferSize-dwAllocationGranularity),dwAllocationGranularity,PAGE_READONLY,@OldProtect); {For Win9x}
UnmapViewOfFile(PM);
end;
{We should map another piece of our large file and try again}
{$IFDEF VER90} {Delphi 2}
if ExceptionRecord <> nil then
with ExceptionRecord^ do
{$ELSE}
if E.ExceptionRecord <> nil then
with E.ExceptionRecord^ do
{$ENDIF}
begin
{We touched the guard page at the beginning of the buffer}
if (ExceptionInformation[1] >= NativeUInt(PM)) and
(ExceptionInformation[1] < NativeUInt(PM)+dwAllocationGranularity) then
begin
if (FileOffsetHi > 0) or (FileOffsetLo > 0) then
begin
Sub64(FileOffsetHi,FileOffsetLo,0,BufferDataSize);
Continue; {Continue counting sum of bytes}
end;
end else
{We touched the guard page at the end of the buffer}
if (ExceptionInformation[1] >= NativeUInt(PM)+BufferSize-dwAllocationGranularity) and
(ExceptionInformation[1] < NativeUInt(PM)+BufferSize) then
begin
if (FileOffsetHi < FileSizeHi) or (FileOffsetLo < FileSizeLo) then
begin
Add64(FileOffsetHi,FileOffsetLo,0,BufferDataSize);
Continue; {Continue counting sum of bytes}
end;
end;
end;
raise;
end else
raise;
end;
{End of PHASE 3: Map view of file}
finally
if PM <> nil then
begin
VirtualProtect(PM,dwAllocationGranularity,PAGE_READONLY,@OldProtect); {For Win9x}
VirtualProtect(Pointer(NativeUInt(PM)+BufferSize-dwAllocationGranularity),dwAllocationGranularity,PAGE_READONLY,@OldProtect); {For Win9x}
UnmapViewOfFile(PM);
end;
end;
{End of PHASE 2: Create file mapping}
finally
CloseHandle(HM);
end;
{End of PHASE 1: Open file}
finally
CloseHandle(HF);
end;
end;