forked from Sylnai/xup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfiletail.py
267 lines (222 loc) · 9.22 KB
/
filetail.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
""" $Id: filetail.py 1512 2011-05-20 16:14:29Z morrissj $
Python3 module for tailing a file such as a system log that grows continuously.
Transparently handles files that get rotated or trucated.
Inspired by the Perl File::Tail module.
A simple algorithm is used to dynamically sleep when no new data is available in
the file. The longer the amount of time goes by w/o new data the longer the
sleep interval will be (up to "max_interval") and starts at "interval".
Example:
from filetail import FileTail
tail = FileTail("/var/log/syslog")
for line in tail:
# do something
pass
"""
import os
import sys
from stat import *
from math import floor
from time import sleep, time
import codecs
class FileTail(object):
"""
Tail a file, even if its rotated/truncated.
Inspiration came from the perl module File::Tail.
"""
def __init__(self,
file, # filename to monitor
start_pos="end", # where to initially start reading from
#max_buffer_size=16384, # Max buffer size hint (Not exact; @see file.readlines)
interval=0.1, # sleep time to wait if no data is present (dynamically changes)
#min_interval=0.01, # min sleep time
max_interval=5, # max sleep time
max_wait=60, # max time to wait with no data before reopening file
reopen_check="inode", # how to check if file is different (inode or time) - inode does not work on win32
encoding="utf-8" # file encoding
):
self.start_pos = start_pos
self.reopen_check = reopen_check
self.max_wait = max_wait
#self.max_buffer_size = max_buffer_size
#self.min_interval = min_interval
self.max_interval = max_interval
self.interval = interval
if self.interval > self.max_interval:
self.interval = self.max_interval
self.encoding = encoding
# will throw exception if it fails... caller should intercept
self.open(file, start_pos=start_pos)
# initialize some internal vars
self._buffer = []
self.last_time = time()
self.last_count = 0
def open(self, file, start_pos="head"):
"""Open the file to tail and initialize our state."""
fh = codecs.open(file, "r", "utf_16")
# seek to the initial position in the file we want to start reading
if start_pos == "end" or start_pos == "tail":
fh.seek(0, os.SEEK_END) # End of file
elif start_pos == "start" or start_pos == "head":
#fh.seek(0, os.SEEK_SET) # Beginning of file
pass
elif start_pos is not None:
if start_pos >= 0: # Absolute position
fh.seek(start_pos, os.SEEK_SET)
else: # Absolute position (from end)
fh.seek(abs(start_pos), os.SEEK_END)
# if we passed the end of the file rewind to the actual end.
# This avoids a potential race condition if the file was being rotated
# in the process of opening the file. Not sure if this can actually
# happen, but better safe than sorry.
pos = fh.tell()
if pos > os.stat(file)[ST_SIZE]:
pos = fh.tell()
self.fh = fh
self.pos = pos
self.stat = os.fstat(fh.fileno())
self.file = file
def reopen(self):
"""
Attempt to reopen the current file. If it doesn't appear to have
changed (been rotated) then the current file handle is not changed.
"""
#print("Reopening", self.file, "...", end="")
# if we don't have an opened file already then try to open it now
if not self.fh or self.fh.closed:
try:
self.open(self.file, start_pos="head");
except IOError:
return False
return True
# save current values
fh = self.fh
pos = self.pos
cur = self.stat
# reopen same file
try:
self.open(self.file, "head")
except IOError as e:
#print("FILE DOES NOT EXIST")
return False
new = self.stat
#print(new.st_ino, ' == ', cur.st_ino)
if (
(self.reopen_check == 'inode' and new.st_ino == cur.st_ino)
or
(self.reopen_check == 'time' and new.st_mtime <= floor(self.last_time) and new.st_size == pos)
):
#print("FILE NOT CHANGED")
# file appears to be the same or older than our last read
#self.last_time = new.st_mtime
self.fh = fh
self.pos = pos
self.stat = cur
return False
#print("NEW FILE")
return True
def __iter__(self):
"""
Return iterator to support:
for line in filetail:
print line
"""
self.wait_count = 0
return self
def __next__(self):
"""Interator "next" call."""
return self.next()
def next(self):
line = None
self.wait_count = 0
# low CPU (probably same as the block below this, but ALLOWS tell()!
while not line:
line = self.fh.readline()
if line != "":
# track the time we received new data and how much
self.last_time = time()
self.last_count = 1
else:
self.wait()
## uses the least amount of CPU, but does not allow me to tell()
## is that a bug in readlines()?
#while len(self._buffer) == 0:
# self._buffer = self.fh.readlines(self.max_buffer_size)
# if len(self._buffer) > 0:
# # track the time we received new data and how much
# self.last_time = time()
# self.last_count = len(self._buffer)
# self.wait_count = 0
# else:
# self.wait()
#line = self._buffer.pop(0)
# dealing with the file as binary isn't working as well as i hoped
#while len(self.lines) == 0:
# buffer = self.fh.read(self.max_buffer_size).decode(self.encoding)
# if buffer is not None:
# self._buffer += buffer
# size = self.enqueue(self._buffer)
# if size:
# self._buffer = self._buffer[size:]
# else:
# self.wait()
#line = self.lines.pop(0)
# uses too much CPU!! (but not 100%)
#line = self.fh.readline()
#while line == "":
# self.wait()
# line = self.fh.readline()
# if line != "":
# # track the time we received new data and how much
# self.pos = self.fh.tell()
# self.last_time = time()
# self.last_count = 1 #len(self._buffer)
# self.wait_count = 0
return line
#def enqueue(self, buffer):
# """
# Extract any lines from buffer and add to our self.lines list. Ignores
# the last line if it does not have a line termination ("\n")
# @return total characters extracted from buffer.
# """
# lines = buffer.splitlines(True)
# total = 0;
# for l in lines:
# if l.endswith("\n"):
# self.lines.append(l)
# total += len(l)
# return total
# wait for X seconds. The sleep interval is dynamically predicted based on
# how much was previously read. The predicted interval will never be more
# than max_interval. If enough time passes w/o any new data the file will
# be reopened and checked.
def wait(self):
if self.wait_count == 0:
self.pos = self.fh.tell()
self.stat = os.fstat(self.fh.fileno())
self.wait_count += 1
elapsed = time() - self.last_time
# if we've waited long enough try to reopen the file, if that returns
# true then we're done here and we do not sleep.
if elapsed >= self.max_wait:
self.last_time = time()
if self.reopen():
return
# determine delay value. Delay is longer based on total time passed
# note: currently last_count is always 1.
if self.last_count:
#delay = (time() - self.last_time) / self.last_count
delay = elapsed
else:
delay = self.interval
# don't delay too long
if delay > self.max_interval:
delay = self.max_interval
#elif delay < self.min_interval:
# delay = self.min_interval
#print("delay={:0.06f} elapsed={:0.06f}".format(delay, elapsed))
sleep(delay)
# end of FileTail class
def main():
print("No tests implemented.")
if __name__ == "__main__":
sys.exit(main())