/etc/nginx/sites-available/default
server {
listen 80 default_server;
listen [::]:80 default_server;
root /var/www/public_html;
# Add doku.php as index
index doku.php;
server_name _;
# Block Bytespider
if ($http_user_agent ~ (Bytespider|bytedance) ) {
return 403;
}
location / {
try_files $uri $uri/ @dokuwiki;
}
# Do not serve config, data and .ht*
# Comment during installation
location ~ /(data/|conf/|bin/|inc/|install.php) {
return 403;
}
location ~ /\.ht { deny all; }
# Maximum POST-size and Buffer
client_max_body_size 20M;
client_body_buffer_size 128K;
# Rewrite Rules
location @dokuwiki {
rewrite ^/_media/(.*) /lib/exe/fetch.php?media=$1 last;
rewrite ^/_detail/(.*) /lib/exe/detail.php?media=$1 last;
rewrite ^/_export/([^/]+)/(.*) /doku.php?do=export_$1&id=$2 last;
rewrite ^/(.*) /doku.php?id=$1 last;
}
# pass PHP scripts to FastCGI server
location ~ \.php$ {
try_files $uri =404;
fastcgi_pass unix:/var/run/php/php8.2-fpm.sock;
fastcgi_index index.php;
include fastcgi_params;
fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
}
}
/usr/local/bin/cleanup_dokuwiki_cache.sh
#!/bin/bash
cleanup()
{
local data_path="$1" # full path to data directory of wiki
local retention_days="$2" # number of days after which old files are to be removed
# remove stale lock files (files which are 1-2 days old)
find "${data_path}"/locks/ -name '*.lock' -type f -mtime +1 -delete
# remove files older than ${retention_days} days from the cache
find "${data_path}"/cache/ -type f -mtime +${retention_days} -delete
}
# cleanup DokuWiki installations (path to datadir, number of days)
cleanup /var/www/data 180
Quelle: https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/
/var/www/public_html/robots.txt
User-agent: CCBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: GPTBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Omgilibot Disallow: / User-agent: Omgili Disallow: / User-agent: FacebookBot Disallow: / User-agent: Bytespider Disallow: / User-agent: ImagesiftBot Disallow: /
/etc/borgmatic/config.yaml
...
location:
# List of source directories to backup (required). Globs and
# tildes are expanded.
source_directories:
- /etc
- /home
- /root
- /usr/local
- /var/log
- /var/www
...