# MIT License
#
# Copyright (c) 2021 Eugenio Parodi <ceccopierangiolieugenio AT googlemail DOT com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
__all__ = ['TTkFileBuffer']
import os
import re
import threading
from TermTk.TTkCore.log import TTkLog
from TermTk.TTkCore.signal import pyTTkSignal
'''
w1 w3 w2 w5
Buffer |----|----|----|----| cache buffer
| \ / \
| x \
| / \ \
Pages | 0 | 2 | 1 |None| 3 |None| index to buffer
File |----|----|----|----|----|----| view as list of windows
w1 w2 w3 w4 w5 w6
'''
[docs]
class TTkFileBuffer():
class _Page:
__slots__ = ('_page', '_size', '_buffer')
def __init__(self, page, size):
self._page = page
self._size = size
self._buffer = [""]*self._size
#TTkLog.debug(f"{self._buffer}")
@property
def buffer(self):
return self._buffer
@property
def page(self):
return self._page
__slots__ = (
'_indexes', '_indexesMutex',
'_filename', '_fd',
'_pages', '_buffer',
'_window', '_numW',
'_width',
#Signals
'indexUpdated', 'indexed')
def __init__(self, filename, window, numWindows):
# Signals
self.indexUpdated = pyTTkSignal(float)
self.indexed = pyTTkSignal()
self._window = window
self._numW = numWindows
self._filename = filename
self._indexes = [0]
self._indexesMutex = threading.Lock()
self._width=0
self._buffer = [None]*self._numW
self._pages = [None]
self._fd = open(self._filename, 'r', errors='replace', newline='\n')
threading.Thread(target=self.createIndex).start()
def __del__(self):
self._fd.close()
[docs]
def filename(self):
return self._filename
[docs]
def getLen(self):
return len(self._indexes)
[docs]
def getWidth(self, indexes=None):
return self._width
[docs]
def getLineDirect(self, line):
if line >= self.getLen():
return ""
self._indexesMutex.acquire()
self._fd.seek(self._indexes[line])
self._indexesMutex.release()
return self._fd.readline()
[docs]
def getLine(self, line):
if line >= self.getLen():
return ""
page = line//self._window
offset = line%self._window
if self._pages[page] == None:
# Dispose of the pages to the bottom
dispose = self._buffer.pop(0)
if dispose is not None:
self._pages[dispose.page] = None
self._pages[page] = self._Page(page, self._window)
self._buffer.append(self._pages[page])
self._indexesMutex.acquire()
self._fd.seek(self._indexes[line-offset])
self._indexesMutex.release()
buffer = self._pages[page].buffer
for i in range(self._window):
buffer[i] = self._fd.readline().replace('\r','')
#self._width = max(self._width,len(buffer[i]))
else:
# Push the page to the top of the buffer
i = self._buffer.index(self._pages[page])
p = self._buffer.pop(i)
self._buffer.append(p)
return self._pages[page].buffer[offset]
[docs]
def getSlice(self, line, length):
ret = []
for i in range(line, line+length):
ret.append(self.getLine(i))
return ret
[docs]
def createIndex(self):
# TTkLog.debug(f"Start Indexing {self._filename}")
indexes = []
offset = 0
fileSize = os.stat(self._filename).st_size
chunkSize = 0x1000000 # ~16M
with open(self._filename,'rb') as infile:
while (chunk:=infile.read(chunkSize)):
start = 0
while (index:=chunk.find(0x0A,start))!=-1:
indexes.append(index+offset+1)
start = index+1
self._indexesMutex.acquire()
self._indexes += indexes
self._pages += [None]*(1+(self.getLen()//self._window)-(len(self._pages)))
self._indexesMutex.release()
indexes = []
offset+=len(chunk)
self.indexUpdated.emit(offset/fileSize)
# TTkLog.debug(f"{self._filename} {offset/fileSize} ...")
self._width = max( (self._indexes[i+1]-self._indexes[i]) for i in range(len(self._indexes)-1) )
self.indexUpdated.emit(1.0)
self.indexed.emit()
# TTkLog.debug(f"{self._filename} {offset/fileSize} END")
[docs]
def searchRe(self, regex, ignoreCase=False):
indexes = []
rr = re.compile(regex, re.IGNORECASE if ignoreCase else 0)
TTkLog.debug(f"Search RE: {regex}")
# from datetime import datetime
# now = datetime.now()
# TTkLog.debug(f"Time1 {now}")
with open(self._filename, 'r', errors='replace', newline='\n') as infile:
# for i,line in enumerate(infile):
# if rr.search(line):
# indexes.append(i)
# id = 0
# for line in infile:
# ma = rr.search(line)
# if ma:
# indexes.append(id)
# id += 1
# for i,index in enumerate(self._indexes):
# infile.seek(index)
# rl = infile.readline()
# if rr.search(rl):
# indexes.append(i)
indexes = [i for i,line in enumerate(infile) if rr.search(line)]
# TTkLog.debug(f"Time2 {datetime.now()}")
# TTkLog.debug(f"Diff: {datetime.now() - now}")
return indexes
[docs]
def search(self, txt):
indexes = []
with open(self._filename, 'r', errors='replace', newline='\n') as infile:
for line in infile:
if txt in line:
indexes.append(id)
return indexes