Source code for TermTk.TTkCore.filebuffer

# MIT License
#
# Copyright (c) 2021 Eugenio Parodi <ceccopierangiolieugenio AT googlemail DOT com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__all__ = ['TTkFileBuffer']

import os
import re
import threading
from TermTk.TTkCore.log import TTkLog
from TermTk.TTkCore.signal import pyTTkSignal

'''
             w1   w3   w2   w5
    Buffer |----|----|----|----|            cache buffer
             |      \ /       \
             |       x         \
             |      / \         \
    Pages  | 0  | 2  | 1  |None| 3  |None|  index to buffer
    File   |----|----|----|----|----|----|  view as list of windows
             w1   w2   w3   w4   w5   w6
'''
[docs] class TTkFileBuffer(): class _Page: __slots__ = ('_page', '_size', '_buffer') def __init__(self, page, size): self._page = page self._size = size self._buffer = [""]*self._size #TTkLog.debug(f"{self._buffer}") @property def buffer(self): return self._buffer @property def page(self): return self._page __slots__ = ( '_indexes', '_indexesMutex', '_filename', '_fd', '_pages', '_buffer', '_window', '_numW', '_width', #Signals 'indexUpdated', 'indexed') def __init__(self, filename, window, numWindows): # Signals self.indexUpdated = pyTTkSignal(float) self.indexed = pyTTkSignal() self._window = window self._numW = numWindows self._filename = filename self._indexes = [0] self._indexesMutex = threading.Lock() self._width=0 self._buffer = [None]*self._numW self._pages = [None] self._fd = open(self._filename, 'r', errors='replace', newline='\n') threading.Thread(target=self.createIndex).start() def __del__(self): self._fd.close()
[docs] def filename(self): return self._filename
[docs] def getLen(self): return len(self._indexes)
[docs] def getWidth(self, indexes=None): return self._width
[docs] def getLineDirect(self, line): if line >= self.getLen(): return "" self._indexesMutex.acquire() self._fd.seek(self._indexes[line]) self._indexesMutex.release() return self._fd.readline()
[docs] def getLine(self, line): if line >= self.getLen(): return "" page = line//self._window offset = line%self._window if self._pages[page] == None: # Dispose of the pages to the bottom dispose = self._buffer.pop(0) if dispose is not None: self._pages[dispose.page] = None self._pages[page] = self._Page(page, self._window) self._buffer.append(self._pages[page]) self._indexesMutex.acquire() self._fd.seek(self._indexes[line-offset]) self._indexesMutex.release() buffer = self._pages[page].buffer for i in range(self._window): buffer[i] = self._fd.readline().replace('\r','') #self._width = max(self._width,len(buffer[i])) else: # Push the page to the top of the buffer i = self._buffer.index(self._pages[page]) p = self._buffer.pop(i) self._buffer.append(p) return self._pages[page].buffer[offset]
[docs] def getSlice(self, line, length): ret = [] for i in range(line, line+length): ret.append(self.getLine(i)) return ret
[docs] def createIndex(self): # TTkLog.debug(f"Start Indexing {self._filename}") indexes = [] offset = 0 fileSize = os.stat(self._filename).st_size chunkSize = 0x1000000 # ~16M with open(self._filename,'rb') as infile: while (chunk:=infile.read(chunkSize)): start = 0 while (index:=chunk.find(0x0A,start))!=-1: indexes.append(index+offset+1) start = index+1 self._indexesMutex.acquire() self._indexes += indexes self._pages += [None]*(1+(self.getLen()//self._window)-(len(self._pages))) self._indexesMutex.release() indexes = [] offset+=len(chunk) self.indexUpdated.emit(offset/fileSize) # TTkLog.debug(f"{self._filename} {offset/fileSize} ...") self._width = max( (self._indexes[i+1]-self._indexes[i]) for i in range(len(self._indexes)-1) ) self.indexUpdated.emit(1.0) self.indexed.emit()
# TTkLog.debug(f"{self._filename} {offset/fileSize} END")
[docs] def searchRe(self, regex, ignoreCase=False): indexes = [] rr = re.compile(regex, re.IGNORECASE if ignoreCase else 0) TTkLog.debug(f"Search RE: {regex}") # from datetime import datetime # now = datetime.now() # TTkLog.debug(f"Time1 {now}") with open(self._filename, 'r', errors='replace', newline='\n') as infile: # for i,line in enumerate(infile): # if rr.search(line): # indexes.append(i) # id = 0 # for line in infile: # ma = rr.search(line) # if ma: # indexes.append(id) # id += 1 # for i,index in enumerate(self._indexes): # infile.seek(index) # rl = infile.readline() # if rr.search(rl): # indexes.append(i) indexes = [i for i,line in enumerate(infile) if rr.search(line)] # TTkLog.debug(f"Time2 {datetime.now()}") # TTkLog.debug(f"Diff: {datetime.now() - now}") return indexes
[docs] def search(self, txt): indexes = [] with open(self._filename, 'r', errors='replace', newline='\n') as infile: for line in infile: if txt in line: indexes.append(id) return indexes