Subversion Repositories configs

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4 - 1
#
2
# Sample Webalizer configuration file
3
# Copyright 1997-2000 by Bradford L. Barrett (brad@mrunix.net)
4
#
5
# Distributed under the GNU General Public License.  See the
6
# files "Copyright" and "COPYING" provided with the webalizer
7
# distribution for additional information.
8
#
9
# This is a sample configuration file for the Webalizer (ver 2.01)
10
# Lines starting with pound signs '#' are comment lines and are
11
# ignored.  Blank lines are skipped as well.  Other lines are considered
12
# as configuration lines, and have the form "ConfigOption  Value" where
13
# ConfigOption is a valid configuration keyword, and Value is the value
14
# to assign that configuration option.  Invalid keyword/values are
15
# ignored, with appropriate warnings being displayed.  There must be
16
# at least one space or tab between the keyword and its value.
17
#
18
# As of version 0.98, The Webalizer will look for a 'default' configuration
19
# file named "webalizer.conf" in the current directory, and if not found
20
# there, will look for "/etc/webalizer.conf".
21
 
22
 
23
# LogFile defines the web server log file to use.  If not specified
24
# here or on on the command line, input will default to STDIN.  If
25
# the log filename ends in '.gz' (ie: a gzip compressed file), it will
26
# be decompressed on the fly as it is being read.
27
 
28
LogFile        /var/log/httpd/access_log
29
 
30
# LogType defines the log type being processed.  Normally, the Webalizer
31
# expects a CLF or Combined web server log as input.  Using this option,
32
# you can process ftp logs as well (xferlog as produced by wu-ftp and
33
# others), or Squid native logs.  Values can be 'clf', 'ftp' or 'squid',
34
# with 'clf' the default.
35
 
36
#LogType	clf
37
 
38
# OutputDir is where you want to put the output files.  This should
39
# should be a full path name, however relative ones might work as well.
40
# If no output directory is specified, the current directory will be used.
41
 
42
OutputDir      /var/www/usage
43
 
44
# HistoryName allows you to specify the name of the history file produced
45
# by the Webalizer.  The history file keeps the data for up to 12 months
46
# worth of logs, used for generating the main HTML page (index.html).
47
# The default is a file named "webalizer.hist", stored in the specified
48
# output directory.  If you specify just the filename (without a path),
49
# it will be kept in the specified output directory.  Otherwise, the path
50
# is relative to the output directory, unless absolute (leading /).
51
 
52
HistoryName	/var/lib/webalizer/webalizer.hist
53
 
54
# Incremental processing allows multiple partial log files to be used
55
# instead of one huge one.  Useful for large sites that have to rotate
56
# their log files more than once a month.  The Webalizer will save its
57
# internal state before exiting, and restore it the next time run, in
58
# order to continue processing where it left off.  This mode also causes
59
# The Webalizer to scan for and ignore duplicate records (records already
60
# processed by a previous run).  See the README file for additional
61
# information.  The value may be 'yes' or 'no', with a default of 'no'.
62
# The file 'webalizer.current' is used to store the current state data,
63
# and is located in the output directory of the program (unless changed
64
# with the IncrementalName option below).  Please read at least the section
65
# on Incremental processing in the README file before you enable this option.
66
 
67
Incremental	yes
68
 
69
# IncrementalName allows you to specify the filename for saving the
70
# incremental data in.  It is similar to the HistoryName option where the
71
# name is relative to the specified output directory, unless an absolute
72
# filename is specified.  The default is a file named "webalizer.current"
73
# kept in the normal output directory.  If you don't specify "Incremental"
74
# as 'yes' then this option has no meaning.
75
 
76
IncrementalName	/var/lib/webalizer/webalizer.current
77
 
78
# ReportTitle is the text to display as the title.  The hostname
79
# (unless blank) is appended to the end of this string (seperated with
80
# a space) to generate the final full title string.
81
# Default is (for english) "Usage Statistics for".
82
 
83
#ReportTitle    Usage Statistics for
84
 
85
# HostName defines the hostname for the report.  This is used in
86
# the title, and is prepended to the URL table items.  This allows
87
# clicking on URL's in the report to go to the proper location in
88
# the event you are running the report on a 'virtual' web server,
89
# or for a server different than the one the report resides on.
90
# If not specified here, or on the command line, webalizer will
91
# try to get the hostname via a uname system call.  If that fails,
92
# it will default to "localhost".
93
 
94
#HostName       localhost
95
 
96
# HTMLExtension allows you to specify the filename extension to use
97
# for generated HTML pages.  Normally, this defaults to "html", but
98
# can be changed for sites who need it (like for PHP embeded pages).
99
 
100
#HTMLExtension  html
101
 
102
# PageType lets you tell the Webalizer what types of URL's you
103
# consider a 'page'.  Most people consider html and cgi documents
104
# as pages, while not images and audio files.  If no types are
105
# specified, defaults will be used ('htm*', 'cgi' and HTMLExtension
106
# if different for web logs, 'txt' for ftp logs).
107
 
108
PageType	htm*
109
PageType	cgi
110
PageType        php
111
PageType        shtml
112
#PageType	phtml
113
#PageType	php3
114
#PageType	pl
115
 
116
# UseHTTPS should be used if the analysis is being run on a
117
# secure server, and links to urls should use 'https://' instead
118
# of the default 'http://'.  If you need this, set it to 'yes'.
119
# Default is 'no'.  This only changes the behaviour of the 'Top
120
# URL's' table.
121
 
122
#UseHTTPS       no
123
 
124
# DNSCache specifies the DNS cache filename to use for reverse DNS lookups.
125
# This file must be specified if you wish to perform name lookups on any IP
126
# addresses found in the log file.  If an absolute path is not given as
127
# part of the filename (ie: starts with a leading '/'), then the name is
128
# relative to the default output directory.  See the DNS.README file for
129
# additional information.
130
 
131
DNSCache	/var/lib/webalizer/dns_cache.db
132
 
133
# DNSChildren allows you to specify how many "children" processes are
134
# run to perform DNS lookups to create or update the DNS cache file.
135
# If a number is specified, the DNS cache file will be created/updated
136
# each time the Webalizer is run, immediately prior to normal processing,
137
# by running the specified number of "children" processes to perform
138
# DNS lookups.  If used, the DNS cache filename MUST be specified as
139
# well.  The default value is zero (0), which disables DNS cache file
140
# creation/updates at run time.  The number of children processes to
141
# run may be anywhere from 1 to 100, however a large number may effect
142
# normal system operations.  Reasonable values should be between 5 and
143
# 20.  See the DNS.README file for additional information.
144
 
145
DNSChildren	10
146
 
147
# HTMLPre defines HTML code to insert at the very beginning of the
148
# file.  Default is the DOCTYPE line shown below.  Max line length
149
# is 80 characters, so use multiple HTMLPre lines if you need more.
150
 
151
#HTMLPre <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
152
 
153
# HTMLHead defines HTML code to insert within the <HEAD></HEAD>
154
# block, immediately after the <TITLE> line.  Maximum line length
155
# is 80 characters, so use multiple lines if needed.
156
 
157
#HTMLHead <META NAME="author" CONTENT="The Webalizer">
158
 
159
# HTMLBody defined the HTML code to be inserted, starting with the
160
# <BODY> tag.  If not specified, the default is shown below.  If
161
# used, you MUST include your own <BODY> tag as the first line.
162
# Maximum line length is 80 char, use multiple lines if needed.
163
 
164
#HTMLBody <BODY BGCOLOR="#E8E8E8" TEXT="#000000" LINK="#0000FF" VLINK="#FF0000">
165
 
166
# HTMLPost defines the HTML code to insert immediately before the
167
# first <HR> on the document, which is just after the title and
168
# "summary period"-"Generated on:" lines.  If anything, this should
169
# be used to clean up in case an image was inserted with HTMLBody.
170
# As with HTMLHead, you can define as many of these as you want and
171
# they will be inserted in the output stream in order of apperance.
172
# Max string size is 80 characters.  Use multiple lines if you need to.
173
 
174
#HTMLPost 	<BR CLEAR="all">
175
 
176
# HTMLTail defines the HTML code to insert at the bottom of each
177
# HTML document, usually to include a link back to your home
178
# page or insert a small graphic.  It is inserted as a table
179
# data element (ie: <TD> your code here </TD>) and is right
180
# alligned with the page.  Max string size is 80 characters.
181
 
182
#HTMLTail <IMG SRC="msfree.png" ALT="100% Micro$oft free!">
183
 
184
# HTMLEnd defines the HTML code to add at the very end of the
185
# generated files.  It defaults to what is shown below.  If
186
# used, you MUST specify the </BODY> and </HTML> closing tags
187
# as the last lines.  Max string length is 80 characters.
188
 
189
#HTMLEnd </BODY></HTML>
190
 
191
# The Quiet option suppresses output messages... Useful when run
192
# as a cron job to prevent bogus e-mails.  Values can be either
193
# "yes" or "no".  Default is "no".  Note: this does not suppress
194
# warnings and errors (which are printed to stderr).
195
 
196
Quiet		yes
197
 
198
# ReallyQuiet will supress all messages including errors and
199
# warnings.  Values can be 'yes' or 'no' with 'no' being the
200
# default.  If 'yes' is used here, it cannot be overriden from
201
# the command line, so use with caution.  A value of 'no' has
202
# no effect.
203
 
204
#ReallyQuiet	no
205
 
206
# TimeMe allows you to force the display of timing information
207
# at the end of processing.  A value of 'yes' will force the
208
# timing information to be displayed.  A value of 'no' has no
209
# effect.
210
 
211
#TimeMe		no
212
 
213
# GMTTime allows reports to show GMT (UTC) time instead of local
214
# time.  Default is to display the time the report was generated
215
# in the timezone of the local machine, such as EDT or PST.  This
216
# keyword allows you to have times displayed in UTC instead.  Use
217
# only if you really have a good reason, since it will probably
218
# screw up the reporting periods by however many hours your local
219
# time zone is off of GMT.
220
 
221
#GMTTime		no
222
 
223
# Debug prints additional information for error messages.  This
224
# will cause webalizer to dump bad records/fields instead of just
225
# telling you it found a bad one.   As usual, the value can be
226
# either "yes" or "no".  The default is "no".  It shouldn't be
227
# needed unless you start getting a lot of Warning or Error
228
# messages and want to see why.  (Note: warning and error messages
229
# are printed to stderr, not stdout like normal messages).
230
 
231
#Debug		no
232
 
233
# FoldSeqErr forces the Webalizer to ignore sequence errors.
234
# The Apache HTTP server may generate out-of-sequence log entries
235
# so this option is enabled.
236
 
237
FoldSeqErr	yes
238
 
239
# VisitTimeout allows you to set the default timeout for a visit
240
# (sometimes called a 'session').  The default is 30 minutes,
241
# which should be fine for most sites.
242
# Visits are determined by looking at the time of the current
243
# request, and the time of the last request from the site.  If
244
# the time difference is greater than the VisitTimeout value, it
245
# is considered a new visit, and visit totals are incremented.
246
# Value is the number of seconds to timeout (default=1800=30min)
247
 
248
#VisitTimeout	1800
249
 
250
# IgnoreHist shouldn't be used in a config file, but it is here
251
# just because it might be usefull in certain situations.  If the
252
# history file is ignored, the main "index.html" file will only
253
# report on the current log files contents.  Usefull only when you
254
# want to reproduce the reports from scratch.  USE WITH CAUTION!
255
# Valid values are "yes" or "no".  Default is "no".
256
 
257
#IgnoreHist	no
258
 
259
# Country Graph allows the usage by country graph to be disabled.
260
# Values can be 'yes' or 'no', default is 'yes'.
261
 
262
#CountryGraph	yes
263
 
264
# DailyGraph and DailyStats allows the daily statistics graph
265
# and statistics table to be disabled (not displayed).  Values
266
# may be "yes" or "no". Default is "yes".
267
 
268
#DailyGraph	yes
269
#DailyStats	yes
270
 
271
# HourlyGraph and HourlyStats allows the hourly statistics graph
272
# and statistics table to be disabled (not displayed).  Values
273
# may be "yes" or "no". Default is "yes".
274
 
275
#HourlyGraph	yes
276
#HourlyStats	yes
277
 
278
# GraphLegend allows the color coded legends to be turned on or off
279
# in the graphs.  The default is for them to be displayed.  This only
280
# toggles the color coded legends, the other legends are not changed.
281
# If you think they are hideous and ugly, say 'no' here :)
282
 
283
#GraphLegend	yes
284
 
285
# GraphLines allows you to have index lines drawn behind the graphs.
286
# I personally am not crazy about them, but a lot of people requested
287
# them and they weren't a big deal to add.  The number represents the
288
# number of lines you want displayed.  Default is 2, you can disable
289
# the lines by using a value of zero ('0').  [max is 20]
290
# Note, due to rounding errors, some values don't work quite right.
291
# The lower the better, with 1,2,3,4,6 and 10 producing nice results.
292
 
293
#GraphLines	2
294
 
295
# The "Top" options below define the number of entries for each table.
296
# Defaults are Sites=30, URL's=30, Referrers=30 and Agents=15, and
297
# Countries=30. TopKSites and TopKURLs (by KByte tables) both default
298
# to 10, as do the top entry/exit tables (TopEntry/TopExit).  The top
299
# search strings and usernames default to 20.  Tables may be disabled
300
# by using zero (0) for the value.
301
 
302
#TopSites        30
303
#TopKSites       10
304
#TopURLs         30
305
#TopKURLs        10
306
#TopReferrers    30
307
#TopAgents       15
308
#TopCountries    30
309
#TopEntry        10
310
#TopExit         10
311
#TopSearch       20
312
#TopUsers        20
313
 
314
# The All* keywords allow the display of all URL's, Sites, Referrers
315
# User Agents, Search Strings and Usernames.  If enabled, a seperate
316
# HTML page will be created, and a link will be added to the bottom
317
# of the appropriate "Top" table.  There are a couple of conditions
318
# for this to occur..  First, there must be more items than will fit
319
# in the "Top" table (otherwise it would just be duplicating what is
320
# already displayed).  Second, the listing will only show those items
321
# that are normally visable, which means it will not show any hidden
322
# items.  Grouped entries will be listed first, followed by individual
323
# items.  The value for these keywords can be either 'yes' or 'no',
324
# with the default being 'no'.  Please be aware that these pages can
325
# be quite large in size, particularly the sites page,  and seperate
326
# pages are generated for each month, which can consume quite a lot
327
# of disk space depending on the traffic to your site.
328
 
329
#AllSites	no
330
#AllURLs	no
331
#AllReferrers	no
332
#AllAgents	no
333
#AllSearchStr	no
334
#AllUsers       no
335
 
336
# The Webalizer normally strips the string 'index.' off the end of
337
# URL's in order to consolidate URL totals.  For example, the URL
338
# /somedir/index.html is turned into /somedir/ which is really the
339
# same URL.  This option allows you to specify additional strings
340
# to treat in the same way.  You don't need to specify 'index.' as
341
# it is always scanned for by The Webalizer, this option is just to
342
# specify _additional_ strings if needed.  If you don't need any,
343
# don't specify any as each string will be scanned for in EVERY
344
# log record... A bunch of them will degrade performance.  Also,
345
# the string is scanned for anywhere in the URL, so a string of
346
# 'home' would turn the URL /somedir/homepages/brad/home.html into
347
# just /somedir/ which is probably not what was intended.
348
 
349
#IndexAlias     home.htm
350
#IndexAlias	homepage.htm
351
 
352
# The Hide*, Group* and Ignore* and Include* keywords allow you to
353
# change the way Sites, URL's, Referrers, User Agents and Usernames
354
# are manipulated.  The Ignore* keywords will cause The Webalizer to
355
# completely ignore records as if they didn't exist (and thus not
356
# counted in the main site totals).  The Hide* keywords will prevent
357
# things from being displayed in the 'Top' tables, but will still be
358
# counted in the main totals.  The Group* keywords allow grouping
359
# similar objects as if they were one.  Grouped records are displayed
360
# in the 'Top' tables and can optionally be displayed in BOLD and/or
361
# shaded. Groups cannot be hidden, and are not counted in the main
362
# totals. The Group* options do not, by default, hide all the items
363
# that it matches.  If you want to hide the records that match (so just
364
# the grouping record is displayed), follow with an identical Hide*
365
# keyword with the same value.  (see example below)  In addition,
366
# Group* keywords may have an optional label which will be displayed
367
# instead of the keywords value.  The label should be seperated from
368
# the value by at least one 'white-space' character, such as a space
369
# or tab.
370
#
371
# The value can have either a leading or trailing '*' wildcard
372
# character.  If no wildcard is found, a match can occur anywhere
373
# in the string. Given a string "www.yourmama.com", the values "your",
374
# "*mama.com" and "www.your*" will all match.
375
 
376
# Your own site should be hidden
377
#HideSite	*mrunix.net
378
#HideSite	localhost
379
 
380
# Your own site gives most referrals
381
#HideReferrer	mrunix.net/
382
 
383
# This one hides non-referrers ("-" Direct requests)
384
#HideReferrer	Direct Request
385
 
386
# Usually you want to hide these
387
HideURL		*.gif
388
HideURL		*.GIF
389
HideURL		*.jpg
390
HideURL		*.JPG
391
HideURL		*.png
392
HideURL		*.PNG
393
HideURL		*.ra
394
 
395
# Hiding agents is kind of futile
396
#HideAgent	RealPlayer
397
 
398
# You can also hide based on authenticated username
399
#HideUser	root
400
#HideUser	admin
401
 
402
# Grouping options
403
#GroupURL	/cgi-bin/*	CGI Scripts
404
#GroupURL	/images/*	Images
405
 
406
#GroupSite	*.aol.com
407
#GroupSite	*.compuserve.com
408
 
409
#GroupReferrer	yahoo.com/	Yahoo!
410
#GroupReferrer	excite.com/     Excite
411
#GroupReferrer	infoseek.com/   InfoSeek
412
#GroupReferrer	webcrawler.com/ WebCrawler
413
 
414
#GroupUser      root            Admin users
415
#GroupUser      admin           Admin users
416
#GroupUser      wheel           Admin users
417
 
418
# The following is a great way to get an overall total
419
# for browsers, and not display all the detail records.
420
# (You should use MangleAgent to refine further...)
421
 
422
#GroupAgent	MSIE		Micro$oft Internet Exploder
423
#HideAgent	MSIE
424
#GroupAgent	Mozilla		Netscape
425
#HideAgent	Mozilla
426
#GroupAgent	Lynx*		Lynx
427
#HideAgent	Lynx*
428
 
429
# HideAllSites allows forcing individual sites to be hidden in the
430
# report.  This is particularly useful when used in conjunction
431
# with the "GroupDomain" feature, but could be useful in other
432
# situations as well, such as when you only want to display grouped
433
# sites (with the GroupSite keywords...).  The value for this
434
# keyword can be either 'yes' or 'no', with 'no' the default,
435
# allowing individual sites to be displayed.
436
 
437
#HideAllSites	no
438
 
439
# The GroupDomains keyword allows you to group individual hostnames
440
# into their respective domains.  The value specifies the level of
441
# grouping to perform, and can be thought of as 'the number of dots'
442
# that will be displayed.  For example, if a visiting host is named
443
# cust1.tnt.mia.uu.net, a domain grouping of 1 will result in just
444
# "uu.net" being displayed, while a 2 will result in "mia.uu.net".
445
# The default value of zero disable this feature.  Domains will only
446
# be grouped if they do not match any existing "GroupSite" records,
447
# which allows overriding this feature with your own if desired.
448
 
449
#GroupDomains	0
450
 
451
# The GroupShading allows grouped rows to be shaded in the report.
452
# Useful if you have lots of groups and individual records that
453
# intermingle in the report, and you want to diferentiate the group
454
# records a little more.  Value can be 'yes' or 'no', with 'yes'
455
# being the default.
456
 
457
#GroupShading	yes
458
 
459
# GroupHighlight allows the group record to be displayed in BOLD.
460
# Can be either 'yes' or 'no' with the default 'yes'.
461
 
462
#GroupHighlight	yes
463
 
464
# The Ignore* keywords allow you to completely ignore log records based
465
# on hostname, URL, user agent, referrer or username.  I hessitated in
466
# adding these, since the Webalizer was designed to generate _accurate_
467
# statistics about a web servers performance.  By choosing to ignore
468
# records, the accuracy of reports become skewed, negating why I wrote
469
# this program in the first place.  However, due to popular demand, here
470
# they are.  Use the same as the Hide* keywords, where the value can have
471
# a leading or trailing wildcard '*'.  Use at your own risk ;)
472
 
473
#IgnoreSite	bad.site.net
474
#IgnoreURL	/test*
475
#IgnoreReferrer	file:/*
476
#IgnoreAgent	RealPlayer
477
#IgnoreUser     root
478
 
479
# The Include* keywords allow you to force the inclusion of log records
480
# based on hostname, URL, user agent, referrer or username.  They take
481
# precidence over the Ignore* keywords.  Note: Using Ignore/Include
482
# combinations to selectivly process parts of a web site is _extremely
483
# inefficent_!!! Avoid doing so if possible (ie: grep the records to a
484
# seperate file if you really want that kind of report).
485
 
486
# Example: Only show stats on Joe User's pages...
487
#IgnoreURL	*
488
#IncludeURL	~joeuser*
489
 
490
# Or based on an authenticated username
491
#IgnoreUser     *
492
#IncludeUser    someuser
493
 
494
# The MangleAgents allows you to specify how much, if any, The Webalizer
495
# should mangle user agent names.  This allows several levels of detail
496
# to be produced when reporting user agent statistics.  There are six
497
# levels that can be specified, which define different levels of detail
498
# supression.  Level 5 shows only the browser name (MSIE or Mozilla)
499
# and the major version number.  Level 4 adds the minor version number
500
# (single decimal place).  Level 3 displays the minor version to two
501
# decimal places.  Level 2 will add any sub-level designation (such
502
# as Mozilla/3.01Gold or MSIE 3.0b).  Level 1 will attempt to also add
503
# the system type if it is specified.  The default Level 0 displays the
504
# full user agent field without modification and produces the greatest
505
# amount of detail.  User agent names that can't be mangled will be
506
# left unmodified.
507
 
508
#MangleAgents    0
509
 
510
# The SearchEngine keywords allow specification of search engines and
511
# their query strings on the URL.  These are used to locate and report
512
# what search strings are used to find your site.  The first word is
513
# a substring to match in the referrer field that identifies the search
514
# engine, and the second is the URL variable used by that search engine
515
# to define it's search terms.
516
 
517
SearchEngine	yahoo.com	p=
518
SearchEngine	altavista.com	q=
519
SearchEngine	google.com	q=
520
SearchEngine	eureka.com	q=
521
SearchEngine	lycos.com	query=
522
SearchEngine	hotbot.com	MT=
523
SearchEngine	msn.com		MT=
524
SearchEngine	infoseek.com	qt=
525
SearchEngine	webcrawler	searchText=
526
SearchEngine	excite		search=
527
SearchEngine	netscape.com	search=
528
SearchEngine	mamma.com	query=
529
SearchEngine	alltheweb.com	query=
530
SearchEngine	northernlight.com  qr=
531
 
532
# The Dump* keywords allow the dumping of Sites, URL's, Referrers
533
# User Agents, Usernames and Search strings to seperate tab delimited
534
# text files, suitable for import into most database or spreadsheet
535
# programs.
536
 
537
# DumpPath specifies the path to dump the files.  If not specified,
538
# it will default to the current output directory.  Do not use a
539
# trailing slash ('/').
540
 
541
#DumpPath	/var/log/httpd
542
 
543
# The DumpHeader keyword specifies if a header record should be
544
# written to the file.  A header record is the first record of the
545
# file, and contains the labels for each field written.  Normally,
546
# files that are intended to be imported into a database system
547
# will not need a header record, while spreadsheets usually do.
548
# Value can be either 'yes' or 'no', with 'no' being the default.
549
 
550
#DumpHeader	no
551
 
552
# DumpExtension allow you to specify the dump filename extension
553
# to use.  The default is "tab", but some programs are pickey about
554
# the filenames they use, so you may change it here (for example,
555
# some people may prefer to use "csv").
556
 
557
#DumpExtension	tab
558
 
559
# These control the dumping of each individual table.  The value
560
# can be either 'yes' or 'no'.. the default is 'no'.
561
 
562
#DumpSites	no
563
#DumpURLs	no
564
#DumpReferrers	no
565
#DumpAgents	no
566
#DumpUsers	no
567
#DumpSearchStr  no
568
 
569
# End of configuration file...  Have a nice day!