this time with a new approach to read caching. Should make watching and editing recordings on a non-idle (and/or slow) machine more comfortable.
The difference to previous versions (and stock fadvise-enabled vdr) is that previously, read data was almost immediately forgotten after it was used; now a certain amount of recently accessed data (at most ~16M) is kept around. This means that short seeks (jumps) when replaying do not cause disk accesses. Things like switching play mode, FF, setting and moving editing marks shouldn't usually block waiting for disk IO. The changes are most noticeable when eg. several recordings are happening in the background.
I did very little testing, treat this as beta quality at best. Seems to work ok, but i won't probably have time to test it further for a few days; maybe somebody wants to play w/ this, or even better take a look at the Read() path...
artur
--- vdr-1.3.39.org/cutter.c 2005-10-31 12:26:44.000000000 +0000 +++ vdr-1.3.39/cutter.c 2006-01-28 21:33:39.000000000 +0000 @@ -66,6 +66,8 @@ void cCuttingThread::Action(void) toFile = toFileName->Open(); if (!fromFile || !toFile) return; + fromFile->SetReadAhead(MEGABYTE(20)); + toFile->SetReadAhead(MEGABYTE(20)); int Index = Mark->position; Mark = fromMarks.Next(Mark); int FileSize = 0; @@ -90,6 +92,7 @@ void cCuttingThread::Action(void) if (fromIndex->Get(Index++, &FileNumber, &FileOffset, &PictureType, &Length)) { if (FileNumber != CurrentFileNumber) { fromFile = fromFileName->SetOffset(FileNumber, FileOffset); + fromFile->SetReadAhead(MEGABYTE(20)); CurrentFileNumber = FileNumber; } if (fromFile) { @@ -118,10 +121,11 @@ void cCuttingThread::Action(void) break; if (FileSize > MEGABYTE(Setup.MaxVideoFileSize)) { toFile = toFileName->NextFile(); - if (toFile < 0) { + if (!toFile) { error = "toFile 1"; break; } + toFile->SetReadAhead(MEGABYTE(20)); FileSize = 0; } LastIFrame = 0; @@ -158,10 +162,11 @@ void cCuttingThread::Action(void) cutIn = true; if (Setup.SplitEditedFiles) { toFile = toFileName->NextFile(); - if (toFile < 0) { + if (!toFile) { error = "toFile 2"; break; } + toFile->SetReadAhead(MEGABYTE(20)); FileSize = 0; } } --- vdr-1.3.39.org/tools.c 2006-01-15 14:31:45.000000000 +0000 +++ vdr-1.3.39/tools.c 2006-01-29 15:27:10.000000000 +0000 @@ -1040,10 +1040,9 @@ bool cSafeFile::Close(void)
// --- cUnbufferedFile -------------------------------------------------------
-//#define USE_FADVISE +#define USE_FADVISE
-#define READ_AHEAD MEGABYTE(2) -#define WRITE_BUFFER MEGABYTE(10) +#define WRITE_BUFFER KILOBYTE(800)
cUnbufferedFile::cUnbufferedFile(void) { @@ -1059,8 +1058,20 @@ int cUnbufferedFile::Open(const char *Fi { Close(); fd = open(FileName, Flags, Mode); - begin = end = ahead = -1; + curpos = 0; +#ifdef USE_FADVISE + begin = lastpos = ahead = 0; + cachedstart = 0; + cachedend = 0; + readahead = 128*1024; written = 0; + totwritten = 0; + if (fd >= 0) { + // we could use POSIX_FADV_SEQUENTIAL, but we do our own readahead + // disabling the kernel one. + posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM); + } +#endif return fd; }
@@ -1068,16 +1079,9 @@ int cUnbufferedFile::Close(void) { #ifdef USE_FADVISE if (fd >= 0) { - if (ahead > end) - end = ahead; - if (begin >= 0 && end > begin) { - //dsyslog("close buffer: %d (flush: %d bytes, %ld-%ld)", fd, written, begin, end); - if (written) - fdatasync(fd); - posix_fadvise(fd, begin, end - begin, POSIX_FADV_DONTNEED); - } - begin = end = ahead = -1; - written = 0; + if (totwritten) // if we wrote anything make sure the data has hit the disk before + fdatasync(fd); // calling fadvise, as this is our last chance to un-cache it. + posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); } #endif int OldFd = fd; @@ -1085,45 +1089,81 @@ int cUnbufferedFile::Close(void) return close(OldFd); }
-off_t cUnbufferedFile::Seek(off_t Offset, int Whence) -{ - if (fd >= 0) - return lseek(fd, Offset, Whence); - return -1; +// When replaying and going eg FF->PLAY the position jumps back 2..8M +// hence we do not want to drop recently accessed data at once. +// We try to handle the common cases such as PLAY->FF->PLAY, small +// jumps, moving editing marks etc. + +#define FADVGRAN 4096 // AKA fadvise-chunk-size; PAGE_SIZE or + // getpagesize(2) would also work. +#define READCHUNK MEGABYTE(8) + +static inline int fadvise_drop(int fd, off_t offset, off_t len) +{ + // rounding up the window to make sure that + // not PAGE_SIZE-aligned data gets freed. + return posix_fadvise(fd, offset-(FADVGRAN-1), len+(FADVGRAN-1)*2, POSIX_FADV_DONTNEED); } - + ssize_t cUnbufferedFile::Read(void *Data, size_t Size) { if (fd >= 0) { #ifdef USE_FADVISE - off_t pos = lseek(fd, 0, SEEK_CUR); - // jump forward - adjust end position - if (pos > end) - end = pos; - // after adjusting end - don't clear more than previously requested - if (end > ahead) - end = ahead; - // jump backward - drop read ahead of previous run - if (pos < begin) - end = ahead; - if (begin >= 0 && end > begin) - posix_fadvise(fd, begin - KILOBYTE(200), end - begin + KILOBYTE(200), POSIX_FADV_DONTNEED);//XXX macros/parameters??? - begin = pos; + off_t jumped = curpos-lastpos; // nonzero means we're not at the last offset + //dsyslog("READ: %d %08ld, jump: %06ld ra: %06ld size: %ld", fd, curpos, jumped, (long)readahead, (long)Size); + + if ( (cachedstart<cachedend) && ((curpos<cachedstart) || (curpos>cachedend)) ) { + //dsyslog("CACHE JUMPED: %d, %08ld %08ld..%08ld", fd, curpos, cachedstart, cachedend); + fadvise_drop(fd, cachedstart, cachedend-cachedstart); + cachedstart = curpos; + cachedend = curpos; + } + + cachedstart = min(cachedstart, curpos); #endif ssize_t bytesRead = safe_read(fd, Data, Size); #ifdef USE_FADVISE if (bytesRead > 0) { - pos += bytesRead; - end = pos; - // this seems to trigger a non blocking read - this - // may or may not have been finished when we will be called next time. - // If it is not finished we can't release the not yet filled buffers. - // So this is commented out till we find a better solution. - //posix_fadvise(fd, pos, READ_AHEAD, POSIX_FADV_WILLNEED); - ahead = pos + READ_AHEAD; + curpos += bytesRead; + cachedend = max(cachedend, curpos); + + // Read ahead: + // no jump? (allow small forward jump still inside readahead window). + if (jumped>=0 && jumped<=(off_t)readahead) { + // Trigger the readahead IO, but only if we've used at least + // 1/2 of the previously requested area. This avoids calling + // fadvise() after every read() call. + if ((ahead-curpos)<(off_t)(readahead-readahead/2)) { + //dsyslog("WILLNEED: %d, %08ld ra: %06ld", fd, curpos, (long)readahead); + posix_fadvise(fd, curpos, readahead, POSIX_FADV_WILLNEED); + ahead = curpos+readahead; + cachedend = max(cachedend, ahead); + } + if (readahead < Size*32) { // automagically tune readahead size. + readahead = Size*32; + //dsyslog("Readahead for fd: %d increased to %ld", fd, (long)readahead); + } + } + else { + // jumped -> we really don't want any readahead. otherwise eg fast-rewind gets + // in trouble. + ahead = curpos; + } } - else - end = pos; + + if (cachedstart<cachedend) { + if ((curpos-cachedstart)>READCHUNK*2) { + //dsyslog("CACHE TSHRINK: %d, %08ld %08ld..%08ld", fd, curpos, cachedstart, cachedend); + fadvise_drop(fd, cachedstart, (curpos-READCHUNK)-cachedstart); + cachedstart = curpos-READCHUNK; + } + else if ((cachedend>ahead)&&(cachedend-curpos)>READCHUNK*2) { + //dsyslog("CACHE HSHRINK: %d, %08ld %08ld..%08ld", fd, curpos, cachedstart, cachedend); + fadvise_drop(fd, curpos+READCHUNK, cachedend-(curpos+READCHUNK)); + cachedend = curpos+READCHUNK; + } + } + lastpos = curpos; #endif return bytesRead; } @@ -1132,29 +1172,37 @@ ssize_t cUnbufferedFile::Read(void *Data
ssize_t cUnbufferedFile::Write(const void *Data, size_t Size) { + //dsyslog("Unbuffered:Write: fd: %d %8ld..%8ld size: %5ld", fd, curpos, curpos+Size, (long)Size); if (fd >=0) { -#ifdef USE_FADVISE - off_t pos = lseek(fd, 0, SEEK_CUR); -#endif ssize_t bytesWritten = safe_write(fd, Data, Size); #ifdef USE_FADVISE - if (bytesWritten >= 0) { + if (bytesWritten > 0) { + begin = min(begin, curpos); + curpos += bytesWritten; written += bytesWritten; - if (begin >= 0) { - if (pos < begin) - begin = pos; - } - else - begin = pos; - if (pos + bytesWritten > end) - end = pos + bytesWritten; + lastpos = max(lastpos, curpos); if (written > WRITE_BUFFER) { - //dsyslog("flush buffer: %d (%d bytes, %ld-%ld)", fd, written, begin, end); - fdatasync(fd); - if (begin >= 0 && end > begin) - posix_fadvise(fd, begin, end - begin, POSIX_FADV_DONTNEED); - begin = end = -1; + //dsyslog("flush buffer: %d (%d bytes, %ld-%ld)", fd, written, begin, lastpos); + if (lastpos>begin) { + off_t headdrop = min(begin, WRITE_BUFFER*2L); + posix_fadvise(fd, begin-headdrop, lastpos-begin+headdrop, POSIX_FADV_DONTNEED); + } + begin = lastpos = max(0L, curpos-4095); + totwritten += written; written = 0; + // The above fadvise() works when writing slowly (recording), but could + // leave cached data around when writing at a high rate (cutting). + // Also, it seems in some setups, the above does not trigger any I/O and + // the fdatasync() call below has to do all the work (reiserfs with some + // kind of write gathering enabled). + // We add 'readahead' to the threshold in an attempt to increase cutting + // speed; it's a tradeoff -- speed vs RAM-used. + if (totwritten>(MEGABYTE(32)+readahead)) { + //fdatasync(fd); + off_t headdrop = min(curpos-totwritten, totwritten*2L); + posix_fadvise(fd, curpos-totwritten-headdrop, totwritten+headdrop, POSIX_FADV_DONTNEED); + totwritten = 0; + } } } #endif --- vdr-1.3.39.org/tools.h 2006-01-08 11:40:37.000000000 +0000 +++ vdr-1.3.39/tools.h 2006-01-29 13:29:51.000000000 +0000 @@ -237,22 +237,41 @@ public: /// cUnbufferedFile is used for large files that are mainly written or read /// in a streaming manner, and thus should not be cached.
+#include <sys/types.h> +#include <sys/mman.h> +#include <unistd.h> + class cUnbufferedFile { private: int fd; + off_t curpos; + off_t cachedstart; + off_t cachedend; off_t begin; - off_t end; + off_t lastpos; off_t ahead; - ssize_t written; + size_t readahead; + size_t written; + size_t totwritten; public: cUnbufferedFile(void); ~cUnbufferedFile(); int Open(const char *FileName, int Flags, mode_t Mode = DEFFILEMODE); int Close(void); - off_t Seek(off_t Offset, int Whence); + off_t Seek(off_t Offset, int Whence) + { + //dsyslog("Seek: fd: %d offs: %ld whence: %d diff: %ld", fd, (long)Offset, Whence, Offset-curpos); + if (Whence == SEEK_SET && Offset == curpos) + return curpos; + + curpos = lseek(fd, Offset, Whence); + return curpos; + } + ssize_t Read(void *Data, size_t Size); ssize_t Write(const void *Data, size_t Size); static cUnbufferedFile *Create(const char *FileName, int Flags, mode_t Mode = DEFFILEMODE); + void SetReadAhead(size_t ra) { readahead = ra; }; };
class cLockFile {
Klaus Schmidinger wrote:
after running w/ it for a week, i'd now say it works; haven't seen any problems that could be related to the Read() code.
Attached is the patch as I would add it for the next version.
Why not the inlined Seeks()? The IO is very inefficient anyway (tiny read/write sizes), having an extra syscall (seek) for every read/write call doesn't help... (yes, it may not be pretty, but it lets the compiler optimize the calls away, w/o touching any callers. Unless you fixed the callers, then ignore this)
Please take a look at the lines marked CLARIFY and let me know if you have any comment on these.
i'll have to rediff this and compare w/ my current version; will post merged version later. Comments below.
i've dropped the toFile->SetReadAhead() hack -- it doesn't change that much when the datasyncs aren't there anyway.
historical reasons -- it used to be 1/4, not 1/2. I went back to 1/2 after switching to the new read caching strategy; previously i was seeing stalls when replaying and eg cutting at the same time. With the new code hopefully 1/2 is enough and reduces the number of fadvise calls.
it is not, at least not here, however i left it commented out in case there are setups where it actually helps -- it's easier to uncomment this one line for testing than to find the right place to insert the call. It's not on by default because fsyncs are relatively expensive; i'd rather avoid them unless absolutely required. (i've updated the comments in this area since, they will be in the merged version)
artur
[for faster responses please cc me; esp. if it's a reply to an older thread -- i almost missed this]
Artur Skawina wrote:
Do you really think that making Seek() inline actually makes things faster? Can you show some hard numbers?
So we could make this
if (ahead - curpos < (off_t)(readahead / 2)) {
then?
Ok. Please make that a diff against version 1.3.41 with vdr-1.3.41-fadvise.diff applied.
[for faster responses please cc me; esp. if it's a reply to an older thread -- i almost missed this]
Don't worry, your response was fast enough ;-)
Klaus
Klaus Schmidinger wrote:
Do you really think that making Seek() inline actually makes things faster? Can you show some hard numbers?
no, i missed the change in tools.c.
merged your changes w/ my current tree. Whitespace was the worst part, other than that the only diffs now are: - privatized FadviseDrop - a few more comments, hopefully making things more obvious - spelling fixes - simplified the readahead calculation - removed unnecessary Write() alignment handling - removed the readahead-grows-write-window hack; if necessary this should be done differently.
patch vs .41 + vdr-1.3.41-fadvise.diff attached.
artur
--- ../vdr-1.3.41-klaus1/tools.c 2006-02-04 17:58:16.000000000 +0100 +++ tools.c 2006-02-04 20:00:21.000000000 +0100 @@ -1133,6 +1133,7 @@ #ifdef USE_FADVISE off_t jumped = curpos-lastpos; // nonzero means we're not at the last offset if ((cachedstart < cachedend) && (curpos < cachedstart || curpos > cachedend)) { + // current position is outside the cached window -- invalidate it. FadviseDrop(cachedstart, cachedend-cachedstart); cachedstart = curpos; cachedend = curpos; @@ -1151,7 +1152,7 @@ // Trigger the readahead IO, but only if we've used at least // 1/2 of the previously requested area. This avoids calling // fadvise() after every read() call. - if (ahead - curpos < (off_t)(readahead - readahead / 2)) {//XXX CLARIFY: isn't this just 'readahead / 2'??? + if (ahead - curpos < (off_t)(readahead / 2)) { posix_fadvise(fd, curpos, readahead, POSIX_FADV_WILLNEED); ahead = curpos + readahead; cachedend = max(cachedend, ahead); @@ -1161,15 +1162,17 @@ } } else - ahead = curpos; // jumped -> we really don't want any readahead. otherwise eg fast-rewind gets in trouble. + ahead = curpos; // jumped -> we really don't want any readahead. otherwise e.g. fast-rewind gets in trouble. }
if (cachedstart < cachedend) { if (curpos - cachedstart > READCHUNK * 2) { + // current position has moved forward enough, shrink tail window. FadviseDrop(cachedstart, curpos - READCHUNK - cachedstart); cachedstart = curpos - READCHUNK; } else if (cachedend > ahead && cachedend - curpos > READCHUNK * 2) { + // current position has moved back enough, shrink head window. FadviseDrop(curpos + READCHUNK, cachedend - curpos + READCHUNK); cachedend = curpos + READCHUNK; } @@ -1193,21 +1196,34 @@ lastpos = max(lastpos, curpos); if (written > WRITE_BUFFER) { if (lastpos > begin) { + // Now do three things: + // 1) Start writeback of begin..lastpos range + // 2) Drop the already written range (by the previous fadvise call) + // 3) Handle nonpagealigned data. + // This is why we double the WRITE_BUFFER; the first time around the + // last (partial) page might be skipped, writeback will start only after + // second call; the third call will still include this page and finally + // drop it from cache. off_t headdrop = min(begin, WRITE_BUFFER * 2L); posix_fadvise(fd, begin - headdrop, lastpos - begin + headdrop, POSIX_FADV_DONTNEED); } - begin = lastpos = max(0L, curpos - (KILOBYTE(4) - 1)); + begin = lastpos = curpos; totwritten += written; written = 0; // The above fadvise() works when writing slowly (recording), but could - // leave cached data around when writing at a high rate (cutting). - // Also, it seems in some setups, the above does not trigger any I/O and - // the fdatasync() call below has to do all the work (reiserfs with some - // kind of write gathering enabled). - // We add 'readahead' to the threshold in an attempt to increase cutting - // speed; it's a tradeoff -- speed vs RAM-used. - if (totwritten > MEGABYTE(32) + readahead) { - //fdatasync(fd);//XXX CLARIFY: so what, is this necessary or not??? + // leave cached data around when writing at a high rate, e.g. when cutting, + // because by the time we try to flush the cached pages (above) the data + // can still be dirty - we are faster than the disk I/O. + // So we do another round of flushing, just like above, but at larger + // intervals -- this should catch any pages that couldn't be released + // earlier. + + if (totwritten > MEGABYTE(32)) { + // It seems in some setups, fadvise() does not trigger any I/O and + // a fdatasync() call would be required do all the work (reiserfs with some + // kind of write gathering enabled), but the syncs cause (io) load.. + // Uncomment the next line if you think you need them. + //fdatasync(fd); off_t headdrop = min(curpos - totwritten, totwritten * 2L); posix_fadvise(fd, curpos - totwritten - headdrop, totwritten + headdrop, POSIX_FADV_DONTNEED); totwritten = 0; --- ../vdr-1.3.41-klaus1/tools.h 2006-02-04 17:58:16.000000000 +0100 +++ tools.h 2006-02-04 20:01:20.000000000 +0100 @@ -251,13 +251,13 @@ size_t readahead; size_t written; size_t totwritten; + int FadviseDrop(off_t Offset, off_t Len); public: cUnbufferedFile(void); ~cUnbufferedFile(); int Open(const char *FileName, int Flags, mode_t Mode = DEFFILEMODE); int Close(void); void SetReadAhead(size_t ra); - int FadviseDrop(off_t Offset, off_t Len); off_t Seek(off_t Offset, int Whence); ssize_t Read(void *Data, size_t Size); ssize_t Write(const void *Data, size_t Size); --- ../vdr-1.3.41-klaus1/cutter.c 2006-02-04 17:58:16.000000000 +0100 +++ cutter.c 2006-01-30 06:57:07.000000000 +0100 @@ -125,7 +125,6 @@ error = "toFile 1"; break; } - toFile->SetReadAhead(MEGABYTE(20)); FileSize = 0; } LastIFrame = 0; @@ -166,7 +165,6 @@ error = "toFile 2"; break; } - toFile->SetReadAhead(MEGABYTE(20)); FileSize = 0; } }
Artur Skawina art_k@o2.pl wrote:
ups, missed this previously -- ignore my comment wrt to seeks... A function call i can live with :)
how could inlining help about the relative slowness of syscalls? the slow thing about syscalls is not the function call but the jump into kernel space. also, nobody uses syscall (2) directly anyway, instead we have read(), write(), lseek() ... wrappers. did i miss something here?
best regards ... clemens