[PATCH] UnbufferedFile improvements v2 - vdr

19 Nov 2005

This version adds a few minor fixes + cutting speed improvement.
well, vdr w/ the recent cUnbufferedFile changes was flushing the data
buffers in huge burst; this was even worse than slowly filling up the
caches -- the large (IIRC ~10M) bursts caused latency problems (apps
visibly freezing etc).
This patch makes vdr use a much more aggressive disk access strategy.
Writes are flushed out almost immediately and the IO is more evenly
distributed. While recording and/or replaying the caches do not grow and
when vdr is done accessing a video file all cached data from that file
is dropped.
I've tested this w/ both local disks and NFS mounted ones, and it seems
to do the right thing. Writes get flushed every 1..2s at a rate of
.5..1M/s instead of the >10M bursts. For async mounted NFS servers the
writes get collected by the NFS server and normally written out. Local
disks get an extra feature -- you can use the HD activity LED as a
"recording" indicator :^)
As posix_advice requires kernel v2.5.60 and glibc v2.2, you'll need at
least those versions to see any difference. (w/o posix_advice you will
not get the fdatasyncs every 10M - if somebody really wants them they
should be controlled by a config option)
Possible further improvements could be:
switch from POSIX_FADV_SEQUENTIAL to POSIX_FADV_RANDOM, since we're
doing manual readahead anyway (or just leave this as is, and drop the
readahead) (not using POSIX_FADV_RANDOM is probably one of the causes of
the "leaks", so some of the workarounds could then go too)
artur

--- vdr-1.3.36.org/cutter.c	2005-10-31 13:26:44.000000000 +0100
+++ vdr-1.3.36/cutter.c	2005-11-18 03:20:50.000000000 +0100
@@ -66,6 +66,7 @@ void cCuttingThread::Action(void)
      toFile = toFileName->Open();
      if (!fromFile || !toFile)
         return;
+     fromFile->setreadahead(MEGABYTE(10));
      int Index = Mark->position;
      Mark = fromMarks.Next(Mark);
      int FileSize = 0;
@@ -90,6 +91,7 @@ void cCuttingThread::Action(void)
            if (fromIndex->Get(Index++, &FileNumber, &FileOffset, &PictureType, &Length)) {
               if (FileNumber != CurrentFileNumber) {
                  fromFile = fromFileName->SetOffset(FileNumber, FileOffset);
+                 fromFile->setreadahead(MEGABYTE(10));
                  CurrentFileNumber = FileNumber;
                  }
               if (fromFile) {
--- vdr-1.3.36.org/tools.c	2005-11-04 17:33:18.000000000 +0100
+++ vdr-1.3.36/tools.c	2005-11-18 20:33:46.000000000 +0100
@@ -851,8 +851,7 @@ bool cSafeFile::Close(void)
// --- cUnbufferedFile -------------------------------------------------------
-#define READ_AHEAD MEGABYTE(2)
-#define WRITE_BUFFER MEGABYTE(10)
+#define WRITE_BUFFER KILOBYTE(800)
cUnbufferedFile::cUnbufferedFile(void)
 {
@@ -869,7 +868,15 @@ int cUnbufferedFile::Open(const char *Fi
   Close();
   fd = open(FileName, Flags, Mode);
   begin = end = ahead = -1;
+  readahead = 16*1024;
+  pendingreadahead = 0;
   written = 0;
+  totwritten = 0;
+  if (fd >= 0) {
+     // we really mean POSIX_FADV_SEQUENTIAL, but we do our own readahead
+     // so turn off the kernel one.
+     posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM);
+     }
   return fd;
 }
@@ -880,10 +887,10 @@ int cUnbufferedFile::Close(void)
         end = ahead;
      if (begin >= 0 && end > begin) {
         //dsyslog("close buffer: %d (flush: %d bytes, %ld-%ld)", fd, written, begin, end);
-        if (written)
+        if (0 && written)
            fdatasync(fd);
-        posix_fadvise(fd, begin, end - begin, POSIX_FADV_DONTNEED);
         }
+     posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
      begin = end = ahead = -1;
      written = 0;
      }
@@ -899,35 +906,89 @@ off_t cUnbufferedFile::Seek(off_t Offset
   return -1;
 }
+// when replaying and going eg FF->PLAY the position jumps back 2..8M
+// hence we might not want to drop that data at once. 
+// Ignoring for now to avoid making this even more complex, but we could
+// at least try to handle the common cases
+// (PLAY->FF->PLAY, small jumps, moving editing marks etc)
+
 ssize_t cUnbufferedFile::Read(void *Data, size_t Size)
 {
   if (fd >= 0) {
      off_t pos = lseek(fd, 0, SEEK_CUR);
-     // jump forward - adjust end position
-     if (pos > end)
-        end = pos;
-     // after adjusting end - don't clear more than previously requested
-     if (end > ahead)
-        end = ahead;
-     // jump backward - drop read ahead of previous run
-     if (pos < begin)
-        end = ahead;
+     off_t jumped = pos-end; // nonzero means we're not at the last offset - some kind of jump happened.
+     if (jumped) {
+        pendingreadahead += ahead-end+KILOBYTE(64);
+        // jumped forward? - treat as if we did read all the way to current pos.
+        if (pos > end) {
+           end = pos;
+           // but clamp at ahead so we don't clear more than previously requested.
+           // (would be mostly harmless anyway, unless we got more than one reader of this file)
+           // add a little extra readahead, JIC the kernel prefethed more than we requested.
+           if (end > (ahead+KILOBYTE(128)))
+              end = ahead+KILOBYTE(128);
+        }
+        // jumped backward? - drop both last read _and_ read-ahead
+        if (pos < begin)
+           end = ahead+KILOBYTE(128);
+        // jumped backward, but still inside prev read window? - pretend we read less.
+        if ((pos >= begin) && (pos < end))
+           end = pos;
+        }
+        
+     ssize_t bytesRead = safe_read(fd, Data, Size);
+     
+     // now drop all data accesed during _previous_ Read().
      if (begin >= 0 && end > begin)
-        posix_fadvise(fd, begin - KILOBYTE(200), end - begin + KILOBYTE(200), POSIX_FADV_DONTNEED);//XXX macros/parameters???
+        posix_fadvise(fd, begin, end-begin, POSIX_FADV_DONTNEED);
+        
      begin = pos;
-     ssize_t bytesRead = safe_read(fd, Data, Size);
      if (bytesRead > 0) {
         pos += bytesRead;
-        end = pos;
         // this seems to trigger a non blocking read - this
         // may or may not have been finished when we will be called next time.
         // If it is not finished we can't release the not yet filled buffers.
         // So this is commented out till we find a better solution.
-        //posix_fadvise(fd, pos, READ_AHEAD, POSIX_FADV_WILLNEED);
-        ahead = pos + READ_AHEAD;
+        
+        // Hmm, it's obviously harmless if we're actually going to read the data
+        // -- the whole point of read-ahead is to start the IO early...
+        // The comment above applies only when we jump somewhere else _before_ the
+        // IO started here finishes. How common would that be? Could be handled eg
+        // by posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED) called some time after
+        // we detect a jump. Ignoring this for now. /AS
+
+        // Ugh, it seems to cause some "leaks" at every jump... Either the
+        // brute force approach mentioned above should work (it's not like this is
+        // much different than O_DIRECT) or keeping notes about the ahead reads and
+        // flushing them after some time. the latter seems overkill though, trying
+        // the former...
+
+        //syslog(LOG_DEBUG,"jump: %06ld ra: %06ld size: %ld", jumped, (long)readahead, (long)Size);
+
+        // no jump? also permit small jump still inside readahead window (FF).
+        if (jumped>=0 && jumped<=(off_t)readahead) {
+           if ( readahead <= Size*4 ) // automagically tune readahead size.
+              readahead = Size*4;
+           posix_fadvise(fd, pos, readahead, POSIX_FADV_WILLNEED);
+           ahead = pos + readahead;
+           }
+        else {
+           // jumped - we really don't want any readahead now. otherwise
+           // eg fast-rewind gets in trouble.
+           ahead = pos;
+
+           // flush it all; mostly to get rid of nonflushed readahead coming
+           // from _previous_ jumps. ratelimited.
+           // the accounting is _very_ unaccurate, i've seen ~50M get flushed
+           // when the limit was set to 4M. As long as this triggers after
+           // _some_ jumps we should be ok though.
+           if (pendingreadahead > MEGABYTE(2)) {
+              posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
+              pendingreadahead = 0;
+              }
+           }
         }
-     else
-        end = pos;
+     end = pos;
      return bytesRead;
      }
   return -1;
@@ -950,11 +1011,19 @@ ssize_t cUnbufferedFile::Write(const voi
            end = pos + bytesWritten;
         if (written > WRITE_BUFFER) {
            //dsyslog("flush buffer: %d (%d bytes, %ld-%ld)", fd, written, begin, end);
-           fdatasync(fd);
-           if (begin >= 0 && end > begin)
-              posix_fadvise(fd, begin, end - begin, POSIX_FADV_DONTNEED);
+           totwritten += written;
+           if (begin >= 0 && end > begin) {
+              off_t headdrop = max((long)begin&~4095,(long)WRITE_BUFFER*2);
+              posix_fadvise(fd, (begin&~4095)-headdrop, end - begin + headdrop, POSIX_FADV_DONTNEED);
+              }
            begin = end = -1;
            written = 0;
+           // the above fadvise() works when recording, but seems to leave cached
+           // data around when writing at a high rate (eg cutting). Hence...
+           if (totwritten > MEGABYTE(20)) {
+                 posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
+              totwritten = 0;
+              }
            }
         }
      return bytesWritten;
--- vdr-1.3.36.org/tools.h	2005-11-05 11:54:39.000000000 +0100
+++ vdr-1.3.36/tools.h	2005-11-18 03:13:31.000000000 +0100
@@ -209,6 +209,9 @@ private:
   off_t end;
   off_t ahead;
   ssize_t written;
+  ssize_t totwritten;
+  size_t readahead;
+  size_t pendingreadahead;
 public:
   cUnbufferedFile(void);
   ~cUnbufferedFile();
@@ -218,6 +221,7 @@ public:
   ssize_t Read(void *Data, size_t Size);
   ssize_t Write(const void *Data, size_t Size);
   static cUnbufferedFile *Create(const char *FileName, int Flags, mode_t Mode = DEFFILEMODE);
+  void setreadahead(size_t ra) { readahead = ra; };
   };
class cLockFile {