new selfhosted version

This commit is contained in:
antonio
2025-11-28 14:11:51 +01:00
parent afda29997d
commit 951860f67e
1046 changed files with 72586 additions and 574750 deletions

4
producer/.dockerignore Normal file
View File

@@ -0,0 +1,4 @@
node_modules
ecosystem.config.js
dist
.gitignore

2
producer/.gitignore vendored
View File

@@ -1,6 +1,4 @@
node_modules
static
ecosystem.config.cjs
ecosystem.config.js
dist
start_dev.js

View File

@@ -1,17 +1,17 @@
FROM node:21-alpine as base
FROM node:22-alpine as builder
RUN npm i -g pnpm
WORKDIR /home/app
COPY --link ./producer/package.json ./producer/pnpm-lock.yaml ./producer/
WORKDIR /home/app/producer
COPY ./package*.json .
RUN pnpm install
COPY . .
RUN pnpm run compile-only
COPY --link ../producer ./
FROM node:22-alpine
WORKDIR /home/app
RUN npm i -g pnpm
COPY package*.json ./
RUN pnpm install
COPY --from=builder /home/app/dist .
RUN pnpm run build
CMD ["node", "/home/app/producer/dist/index.js"]
CMD ["node", "/home/app/index.js"]

View File

@@ -2,6 +2,7 @@
"dependencies": {
"cors": "^2.8.5",
"express": "^4.19.2",
"mongoose": "^8.12.1",
"redis": "^4.7.0"
},
"devDependencies": {
@@ -16,12 +17,11 @@
"main": "dist/index.js",
"scripts": {
"dev": "node scripts/start_dev.js",
"compile": "tsc",
"compile": "pnpm run shared && tsc",
"compile-only": "tsc",
"build": "npm run compile",
"docker-build": "docker build -t litlyx-producer -f Dockerfile ../",
"docker-inspect": "docker run -it litlyx-producer sh",
"workspace:shared": "ts-node ../scripts/producer/shared.ts",
"workspace:deploy": "ts-node ../scripts/producer/deploy.ts"
"shared": "ts-node ../scripts/producer/shared.ts",
"deploy": "ts-node ../scripts/producer/deploy.ts"
},
"keywords": [],
"author": "Emily",

173
producer/pnpm-lock.yaml generated
View File

@@ -14,6 +14,9 @@ importers:
express:
specifier: ^4.19.2
version: 4.19.2
mongoose:
specifier: ^8.12.1
version: 8.12.1
redis:
specifier: ^4.7.0
version: 4.7.0
@@ -50,6 +53,9 @@ packages:
'@jridgewell/trace-mapping@0.3.9':
resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==}
'@mongodb-js/saslprep@1.2.0':
resolution: {integrity: sha512-+ywrb0AqkfaYuhHs6LxKWgqbh3I72EpEgESCw37o+9qPx9WTCkgDm2B+eMrwehGtHBWHFU4GXvnSCNiFhhausg==}
'@redis/bloom@1.2.0':
resolution: {integrity: sha512-HG2DFjYKbpNmVXsa0keLHp/3leGJz1mjh09f2RLGGLQZzSHpkmZWuwJbAvo3QcRY8p80m5+ZdXZdYOSBLlp7Cg==}
peerDependencies:
@@ -127,6 +133,12 @@ packages:
'@types/serve-static@1.15.7':
resolution: {integrity: sha512-W8Ym+h8nhuRwaKPaDw34QUkwsGi6Rc4yYqvKFo5rm2FUEhCFbzVWrxXUxuKK8TASjWsysJY0nsmNCGhCOIsrOw==}
'@types/webidl-conversions@7.0.3':
resolution: {integrity: sha512-CiJJvcRtIgzadHCYXw7dqEnMNRjhGZlYK05Mj9OyktqV8uVT8fD2BFOB7S1uwBE3Kj2Z+4UyPmFw/Ixgw/LAlA==}
'@types/whatwg-url@11.0.5':
resolution: {integrity: sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==}
accepts@1.3.8:
resolution: {integrity: sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==}
engines: {node: '>= 0.6'}
@@ -150,6 +162,10 @@ packages:
resolution: {integrity: sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==}
engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
bson@6.10.3:
resolution: {integrity: sha512-MTxGsqgYTwfshYWTRdmZRC+M7FnG1b4y7RO7p2k3X24Wq0yv1m77Wsj0BzlPzd/IowgESfsruQCUToa7vbOpPQ==}
engines: {node: '>=16.20.1'}
bytes@3.1.2:
resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
engines: {node: '>= 0.8'}
@@ -192,6 +208,15 @@ packages:
supports-color:
optional: true
debug@4.4.0:
resolution: {integrity: sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==}
engines: {node: '>=6.0'}
peerDependencies:
supports-color: '*'
peerDependenciesMeta:
supports-color:
optional: true
define-data-property@1.1.4:
resolution: {integrity: sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==}
engines: {node: '>= 0.4'}
@@ -290,6 +315,10 @@ packages:
resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==}
engines: {node: '>= 0.10'}
kareem@2.6.3:
resolution: {integrity: sha512-C3iHfuGUXK2u8/ipq9LfjFfXFxAZMQJJq7vLS45r3D9Y2xQ/m4S8zaR4zMLFWh9AsNPXmcFfUDhTEO8UIC/V6Q==}
engines: {node: '>=12.0.0'}
make-error@1.3.6:
resolution: {integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==}
@@ -297,6 +326,9 @@ packages:
resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==}
engines: {node: '>= 0.6'}
memory-pager@1.5.0:
resolution: {integrity: sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==}
merge-descriptors@1.0.1:
resolution: {integrity: sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==}
@@ -317,6 +349,48 @@ packages:
engines: {node: '>=4'}
hasBin: true
mongodb-connection-string-url@3.0.2:
resolution: {integrity: sha512-rMO7CGo/9BFwyZABcKAWL8UJwH/Kc2x0g72uhDWzG48URRax5TCIcJ7Rc3RZqffZzO/Gwff/jyKwCU9TN8gehA==}
mongodb@6.14.2:
resolution: {integrity: sha512-kMEHNo0F3P6QKDq17zcDuPeaywK/YaJVCEQRzPF3TOM/Bl9MFg64YE5Tu7ifj37qZJMhwU1tl2Ioivws5gRG5Q==}
engines: {node: '>=16.20.1'}
peerDependencies:
'@aws-sdk/credential-providers': ^3.188.0
'@mongodb-js/zstd': ^1.1.0 || ^2.0.0
gcp-metadata: ^5.2.0
kerberos: ^2.0.1
mongodb-client-encryption: '>=6.0.0 <7'
snappy: ^7.2.2
socks: ^2.7.1
peerDependenciesMeta:
'@aws-sdk/credential-providers':
optional: true
'@mongodb-js/zstd':
optional: true
gcp-metadata:
optional: true
kerberos:
optional: true
mongodb-client-encryption:
optional: true
snappy:
optional: true
socks:
optional: true
mongoose@8.12.1:
resolution: {integrity: sha512-UW22y8QFVYmrb36hm8cGncfn4ARc/XsYWQwRTaj0gxtQk1rDuhzDO1eBantS+hTTatfAIS96LlRCJrcNHvW5+Q==}
engines: {node: '>=16.20.1'}
mpath@0.9.0:
resolution: {integrity: sha512-ikJRQTk8hw5DEoFVxHG1Gn9T/xcjtdnOKIU1JTmGjZZlg9LST2mBLmcX3/ICIbgJydT2GOc15RnNy5mHmzfSew==}
engines: {node: '>=4.0.0'}
mquery@5.0.0:
resolution: {integrity: sha512-iQMncpmEK8R8ncT8HJGsGc9Dsp8xcgYMVSbs5jgnm1lFHTZqMJTUWTDx1LBO8+mK3tPNZWFLBghQEIOULSTHZg==}
engines: {node: '>=14.0.0'}
ms@2.0.0:
resolution: {integrity: sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==}
@@ -349,6 +423,10 @@ packages:
resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
engines: {node: '>= 0.10'}
punycode@2.3.1:
resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
engines: {node: '>=6'}
qs@6.11.0:
resolution: {integrity: sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==}
engines: {node: '>=0.6'}
@@ -389,6 +467,12 @@ packages:
resolution: {integrity: sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==}
engines: {node: '>= 0.4'}
sift@17.1.3:
resolution: {integrity: sha512-Rtlj66/b0ICeFzYTuNvX/EF1igRbbnGSvEyT79McoZa/DeGhMyC5pWKOEsZKnpkqtSeovd5FL/bjHWC3CIIvCQ==}
sparse-bitfield@3.0.3:
resolution: {integrity: sha512-kvzhi7vqKTfkh0PZU+2D2PIllw2ymqJKujUcyPMd9Y75Nv4nPbGJZXNhxsgdQab2BmlDct1YnfQCguEvHr7VsQ==}
statuses@2.0.1:
resolution: {integrity: sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==}
engines: {node: '>= 0.8'}
@@ -397,6 +481,10 @@ packages:
resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
engines: {node: '>=0.6'}
tr46@5.1.0:
resolution: {integrity: sha512-IUWnUK7ADYR5Sl1fZlO1INDUhVhatWl7BtJWsIhwJ0UAK7ilzzIa8uIqOO/aYVWHZPJkKbEL+362wrzoeRF7bw==}
engines: {node: '>=18'}
ts-node@10.9.2:
resolution: {integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==}
hasBin: true
@@ -438,6 +526,14 @@ packages:
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
engines: {node: '>= 0.8'}
webidl-conversions@7.0.0:
resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==}
engines: {node: '>=12'}
whatwg-url@14.2.0:
resolution: {integrity: sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw==}
engines: {node: '>=18'}
yallist@4.0.0:
resolution: {integrity: sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==}
@@ -460,6 +556,10 @@ snapshots:
'@jridgewell/resolve-uri': 3.1.2
'@jridgewell/sourcemap-codec': 1.4.15
'@mongodb-js/saslprep@1.2.0':
dependencies:
sparse-bitfield: 3.0.3
'@redis/bloom@1.2.0(@redis/client@1.6.0)':
dependencies:
'@redis/client': 1.6.0
@@ -544,6 +644,12 @@ snapshots:
'@types/node': 20.14.2
'@types/send': 0.17.4
'@types/webidl-conversions@7.0.3': {}
'@types/whatwg-url@11.0.5':
dependencies:
'@types/webidl-conversions': 7.0.3
accepts@1.3.8:
dependencies:
mime-types: 2.1.35
@@ -576,6 +682,8 @@ snapshots:
transitivePeerDependencies:
- supports-color
bson@6.10.3: {}
bytes@3.1.2: {}
call-bind@1.0.7:
@@ -609,6 +717,10 @@ snapshots:
dependencies:
ms: 2.0.0
debug@4.4.0:
dependencies:
ms: 2.1.3
define-data-property@1.1.4:
dependencies:
es-define-property: 1.0.0
@@ -731,10 +843,14 @@ snapshots:
ipaddr.js@1.9.1: {}
kareem@2.6.3: {}
make-error@1.3.6: {}
media-typer@0.3.0: {}
memory-pager@1.5.0: {}
merge-descriptors@1.0.1: {}
methods@1.1.2: {}
@@ -747,6 +863,44 @@ snapshots:
mime@1.6.0: {}
mongodb-connection-string-url@3.0.2:
dependencies:
'@types/whatwg-url': 11.0.5
whatwg-url: 14.2.0
mongodb@6.14.2:
dependencies:
'@mongodb-js/saslprep': 1.2.0
bson: 6.10.3
mongodb-connection-string-url: 3.0.2
mongoose@8.12.1:
dependencies:
bson: 6.10.3
kareem: 2.6.3
mongodb: 6.14.2
mpath: 0.9.0
mquery: 5.0.0
ms: 2.1.3
sift: 17.1.3
transitivePeerDependencies:
- '@aws-sdk/credential-providers'
- '@mongodb-js/zstd'
- gcp-metadata
- kerberos
- mongodb-client-encryption
- snappy
- socks
- supports-color
mpath@0.9.0: {}
mquery@5.0.0:
dependencies:
debug: 4.4.0
transitivePeerDependencies:
- supports-color
ms@2.0.0: {}
ms@2.1.3: {}
@@ -770,6 +924,8 @@ snapshots:
forwarded: 0.2.0
ipaddr.js: 1.9.1
punycode@2.3.1: {}
qs@6.11.0:
dependencies:
side-channel: 1.0.6
@@ -841,10 +997,20 @@ snapshots:
get-intrinsic: 1.2.4
object-inspect: 1.13.1
sift@17.1.3: {}
sparse-bitfield@3.0.3:
dependencies:
memory-pager: 1.5.0
statuses@2.0.1: {}
toidentifier@1.0.1: {}
tr46@5.1.0:
dependencies:
punycode: 2.3.1
ts-node@10.9.2(@types/node@20.14.2)(typescript@5.4.5):
dependencies:
'@cspotcode/source-map-support': 0.8.1
@@ -880,6 +1046,13 @@ snapshots:
vary@1.1.2: {}
webidl-conversions@7.0.0: {}
whatwg-url@14.2.0:
dependencies:
tr46: 5.1.0
webidl-conversions: 7.0.0
yallist@4.0.0: {}
yn@3.1.1: {}

636
producer/src/controller.ts Normal file
View File

@@ -0,0 +1,636 @@
import { DomainWhitelistModel } from "./shared/schema/shields/DomainWhitelistSchema";
import { AddressBlacklistModel } from "./shared/schema/shields/AddressBlacklistSchema";
import { BotTrafficOptionModel } from "./shared/schema/shields/BotTrafficOptionSchema";
const BOT_PATTERNS = [
"Googlebot\\/",
"Googlebot-Mobile",
"Googlebot-Image",
"Googlebot-News",
"Googlebot-Video",
"AdsBot-Google([^-]|$)",
"AdsBot-Google-Mobile",
"Feedfetcher-Google",
"Mediapartners-Google",
"Mediapartners \\(Googlebot\\)",
"APIs-Google",
"Google-InspectionTool",
"Storebot-Google",
"GoogleOther",
"bingbot",
"Slurp",
"[wW]get",
"LinkedInBot",
"Python-urllib",
"python-requests",
"aiohttp",
"httpx",
"libwww-perl",
"httpunit",
"Nutch",
"Go-http-client",
"phpcrawl",
"msnbot",
"jyxobot",
"FAST-WebCrawler",
"FAST Enterprise Crawler",
"BIGLOTRON",
"Teoma",
"convera",
"seekbot",
"Gigabot",
"Gigablast",
"exabot",
"ia_archiver",
"GingerCrawler",
"webmon ",
"HTTrack",
"grub\\.org",
"UsineNouvelleCrawler",
"antibot",
"netresearchserver",
"speedy",
"fluffy",
"findlink",
"msrbot",
"panscient",
"yacybot",
"AISearchBot",
"ips-agent",
"tagoobot",
"MJ12bot",
"woriobot",
"yanga",
"buzzbot",
"mlbot",
"yandex\\.com\\/bots",
"purebot",
"Linguee Bot",
"CyberPatrol",
"voilabot",
"Baiduspider",
"citeseerxbot",
"spbot",
"twengabot",
"postrank",
"Turnitin",
"scribdbot",
"page2rss",
"sitebot",
"linkdex",
"Adidxbot",
"ezooms",
"dotbot",
"Mail\\.RU_Bot",
"discobot",
"heritrix",
"findthatfile",
"europarchive\\.org",
"NerdByNature\\.Bot",
"(sistrix|SISTRIX) [cC]rawler",
"Ahrefs(Bot|SiteAudit)",
"fuelbot",
"CrunchBot",
"IndeedBot",
"mappydata",
"woobot",
"ZoominfoBot",
"PrivacyAwareBot",
"Multiviewbot",
"SWIMGBot",
"Grobbot",
"eright",
"Apercite",
"semanticbot",
"Aboundex",
"domaincrawler",
"wbsearchbot",
"summify",
"CCBot",
"edisterbot",
"SeznamBot",
"ec2linkfinder",
"gslfbot",
"aiHitBot",
"intelium_bot",
"facebookexternalhit",
"Yeti",
"RetrevoPageAnalyzer",
"lb-spider",
"Sogou",
"lssbot",
"careerbot",
"wotbox",
"wocbot",
"ichiro",
"DuckDuckBot",
"lssrocketcrawler",
"drupact",
"webcompanycrawler",
"acoonbot",
"openindexspider",
"gnam gnam spider",
"web-archive-net\\.com\\.bot",
"backlinkcrawler",
"coccoc",
"integromedb",
"content crawler spider",
"toplistbot",
"it2media-domain-crawler",
"ip-web-crawler\\.com",
"siteexplorer\\.info",
"elisabot",
"proximic",
"changedetection",
"arabot",
"WeSEE:Search",
"niki-bot",
"CrystalSemanticsBot",
"rogerbot",
"360Spider",
"psbot",
"InterfaxScanBot",
"CC Metadata Scaper",
"g00g1e\\.net",
"GrapeshotCrawler",
"urlappendbot",
"brainobot",
"fr-crawler",
"binlar",
"SimpleCrawler",
"Twitterbot",
"cXensebot",
"smtbot",
"bnf\\.fr_bot",
"A6-Indexer",
"ADmantX",
"Facebot",
"OrangeBot\\/",
"memorybot",
"AdvBot",
"MegaIndex",
"SemanticScholarBot",
"ltx71",
"nerdybot",
"xovibot",
"BUbiNG",
"Qwantify",
"archive\\.org_bot",
"Applebot",
"TweetmemeBot",
"crawler4j",
"findxbot",
"S[eE][mM]rushBot",
"yoozBot",
"lipperhey",
"Y!J",
"Domain Re-Animator Bot",
"AddThis",
"Screaming Frog SEO Spider",
"MetaURI",
"Scrapy",
"Livelap[bB]ot",
"OpenHoseBot",
"CapsuleChecker",
"collection@infegy\\.com",
"IstellaBot",
"DeuSu\\/",
"betaBot",
"Cliqzbot\\/",
"MojeekBot\\/",
"netEstate NE Crawler",
"SafeSearch microdata crawler",
"Gluten Free Crawler\\/",
"Sonic",
"Sysomos",
"Trove",
"deadlinkchecker",
"Slack-ImgProxy",
"Embedly",
"RankActiveLinkBot",
"iskanie",
"SafeDNSBot",
"SkypeUriPreview",
"Veoozbot",
"Slackbot",
"redditbot",
"datagnionbot",
"Google-Adwords-Instant",
"adbeat_bot",
"WhatsApp",
"contxbot",
"pinterest\\.com\\/bot",
"electricmonk",
"GarlikCrawler",
"BingPreview\\/",
"vebidoobot",
"FemtosearchBot",
"Yahoo Link Preview",
"MetaJobBot",
"DomainStatsBot",
"mindUpBot",
"Daum\\/",
"Jugendschutzprogramm-Crawler",
"Xenu Link Sleuth",
"Pcore-HTTP",
"moatbot",
"KosmioBot",
"[pP]ingdom",
"AppInsights",
"PhantomJS",
"Gowikibot",
"PiplBot",
"Discordbot",
"TelegramBot",
"Jetslide",
"newsharecounts",
"James BOT",
"Bark[rR]owler",
"TinEye",
"SocialRankIOBot",
"trendictionbot",
"Ocarinabot",
"epicbot",
"Primalbot",
"DuckDuckGo-Favicons-Bot",
"GnowitNewsbot",
"Leikibot",
"LinkArchiver",
"YaK\\/",
"PaperLiBot",
"Digg Deeper",
"dcrawl",
"Snacktory",
"AndersPinkBot",
"Fyrebot",
"EveryoneSocialBot",
"Mediatoolkitbot",
"Luminator-robots",
"ExtLinksBot",
"SurveyBot",
"NING\\/",
"okhttp",
"Nuzzel",
"omgili",
"PocketParser",
"YisouSpider",
"um-LN",
"ToutiaoSpider",
"MuckRack",
"Jamie's Spider",
"AHC\\/",
"NetcraftSurveyAgent",
"Laserlikebot",
"^Apache-HttpClient",
"AppEngine-Google",
"Jetty",
"Upflow",
"Thinklab",
"Traackr\\.com",
"Twurly",
"Mastodon",
"http_get",
"DnyzBot",
"botify",
"007ac9 Crawler",
"BehloolBot",
"BrandVerity",
"check_http",
"BDCbot",
"ZumBot",
"EZID",
"ICC-Crawler",
"ArchiveBot",
"^LCC ",
"filterdb\\.iss\\.net\\/crawler",
"BLP_bbot",
"BomboraBot",
"Buck\\/",
"Companybook-Crawler",
"Genieo",
"magpie-crawler",
"MeltwaterNews",
"Moreover",
"newspaper\\/",
"ScoutJet",
"(^| )sentry\\/",
"StorygizeBot",
"UptimeRobot",
"OutclicksBot",
"seoscanners",
"Hatena",
"Google Web Preview",
"MauiBot",
"AlphaBot",
"SBL-BOT",
"IAS crawler",
"adscanner",
"Netvibes",
"acapbot",
"Baidu-YunGuanCe",
"bitlybot",
"blogmuraBot",
"Bot\\.AraTurka\\.com",
"bot-pge\\.chlooe\\.com",
"BoxcarBot",
"BTWebClient",
"ContextAd Bot",
"Digincore bot",
"Disqus",
"Feedly",
"Fetch\\/",
"Fever",
"Flamingo_SearchEngine",
"FlipboardProxy",
"g2reader-bot",
"G2 Web Services",
"imrbot",
"K7MLWCBot",
"Kemvibot",
"Landau-Media-Spider",
"linkapediabot",
"vkShare",
"Siteimprove\\.com",
"BLEXBot\\/",
"DareBoost",
"ZuperlistBot\\/",
"Miniflux\\/",
"Feedspot",
"Diffbot\\/",
"SEOkicks",
"tracemyfile",
"Nimbostratus-Bot",
"zgrab",
"PR-CY\\.RU",
"AdsTxtCrawler",
"Datafeedwatch",
"Zabbix",
"TangibleeBot",
"google-xrawler",
"axios",
"Amazon CloudFront",
"Pulsepoint",
"CloudFlare-AlwaysOnline",
"Cloudflare-Healthchecks",
"Cloudflare-Traffic-Manager",
"CloudFlare-Prefetch",
"Cloudflare-SSLDetector",
"https:\\/\\/developers\\.cloudflare\\.com\\/security-center\\/",
"Google-Structured-Data-Testing-Tool",
"WordupInfoSearch",
"WebDataStats",
"HttpUrlConnection",
"ZoomBot",
"VelenPublicWebCrawler",
"MoodleBot",
"jpg-newsbot",
"outbrain",
"W3C_Validator",
"Validator\\.nu",
"W3C-checklink",
"W3C-mobileOK",
"W3C_I18n-Checker",
"FeedValidator",
"W3C_CSS_Validator",
"W3C_Unicorn",
"Google-PhysicalWeb",
"Blackboard",
"ICBot\\/",
"BazQux",
"Twingly",
"Rivva",
"Experibot",
"awesomecrawler",
"Dataprovider\\.com",
"GroupHigh\\/",
"theoldreader\\.com",
"AnyEvent",
"Uptimebot\\.org",
"Nmap Scripting Engine",
"2ip\\.ru",
"Clickagy",
"Caliperbot",
"MBCrawler",
"online-webceo-bot",
"B2B Bot",
"AddSearchBot",
"Google Favicon",
"HubSpot",
"Chrome-Lighthouse",
"HeadlessChrome",
"CheckMarkNetwork\\/",
"www\\.uptime\\.com",
"Streamline3Bot\\/",
"serpstatbot\\/",
"MixnodeCache\\/",
"^curl",
"SimpleScraper",
"RSSingBot",
"Jooblebot",
"fedoraplanet",
"Friendica",
"NextCloud",
"Tiny Tiny RSS",
"RegionStuttgartBot",
"Bytespider",
"Datanyze",
"Google-Site-Verification",
"TrendsmapResolver",
"tweetedtimes",
"NTENTbot",
"Gwene",
"SimplePie",
"SearchAtlas",
"Superfeedr",
"feedbot",
"UT-Dorkbot",
"Amazonbot",
"SerendeputyBot",
"Eyeotabot",
"officestorebot",
"Neticle Crawler",
"SurdotlyBot",
"LinkisBot",
"AwarioSmartBot",
"AwarioRssBot",
"RyteBot",
"FreeWebMonitoring SiteChecker",
"AspiegelBot",
"NAVER Blog Rssbot",
"zenback bot",
"SentiBot",
"Domains Project\\/",
"Pandalytics",
"VKRobot",
"bidswitchbot",
"tigerbot",
"NIXStatsbot",
"Atom Feed Robot",
"[Cc]urebot",
"PagePeeker\\/",
"Vigil\\/",
"rssbot\\/",
"startmebot\\/",
"JobboerseBot",
"seewithkids",
"NINJA bot",
"Cutbot",
"BublupBot",
"BrandONbot",
"RidderBot",
"Taboolabot",
"Dubbotbot",
"FindITAnswersbot",
"infoobot",
"Refindbot",
"BlogTraffic\\/\\d\\.\\d+ Feed-Fetcher",
"SeobilityBot",
"Cincraw",
"Dragonbot",
"VoluumDSP-content-bot",
"FreshRSS",
"BitBot",
"^PHP-Curl-Class",
"Google-Certificates-Bridge",
"centurybot",
"Viber",
"e\\.ventures Investment Crawler",
"evc-batch",
"PetalBot",
"virustotal",
"(^| )PTST\\/",
"minicrawler",
"Cookiebot",
"trovitBot",
"seostar\\.co",
"IonCrawl",
"Uptime-Kuma",
"Seekport",
"FreshpingBot",
"Feedbin",
"CriteoBot",
"Snap URL Preview Service",
"Better Uptime Bot",
"RuxitSynthetic",
"Google-Read-Aloud",
"Valve\\/Steam",
"OdklBot\\/",
"GPTBot",
"ChatGPT-User",
"OAI-SearchBot",
"YandexRenderResourcesBot\\/",
"LightspeedSystemsCrawler",
"ev-crawler\\/",
"BitSightBot\\/",
"woorankreview\\/",
"Google-Safety",
"AwarioBot",
"DataForSeoBot",
"Linespider",
"WellKnownBot",
"A Patent Crawler",
"StractBot",
"search\\.marginalia\\.nu",
"YouBot",
"Nicecrawler",
"Neevabot",
"BrightEdge Crawler",
"SiteCheckerBotCrawler",
"TombaPublicWebCrawler",
"CrawlyProjectCrawler",
"KomodiaBot",
"KStandBot",
"CISPA Webcrawler",
"MTRobot",
"hyscore\\.io",
"AlexandriaOrgBot",
"2ip bot",
"Yellowbrandprotectionbot",
"SEOlizer",
"vuhuvBot",
"INETDEX-BOT",
"Synapse",
"t3versionsBot",
"deepnoc",
"Cocolyzebot",
"hypestat",
"ReverseEngineeringBot",
"sempi\\.tech",
"Iframely",
"MetaInspector",
"node-fetch",
"l9explore",
"python-opengraph",
"OpenGraphCheck",
"developers\\.google\\.com\\/\\+\\/web\\/snippet",
"SenutoBot",
"MaCoCu",
"NewsBlur",
"inoreader",
"NetSystemsResearch",
"PageThing",
"WordPress\\/",
"PhxBot",
"ImagesiftBot",
"Expanse",
"InternetMeasurement",
"^BW\\/",
"GeedoBot",
"Audisto Crawler",
"PerplexityBot\\/",
"[cC]laude[bB]ot",
"Monsidobot",
"GroupMeBot",
"Vercelbot",
"vercel-screenshot",
"facebookcatalog\\/",
"meta-externalagent\\/",
"meta-externalfetcher\\/",
"AcademicBotRTU",
"KeybaseBot",
"Lemmy",
"CookieHubScan",
"Hydrozen\\.io",
"HTTP Banner Detection",
"SummalyBot",
"MicrosoftPreview\\/",
"GeedoProductSearch",
"TikTokSpider"
]
function isBot(userAgent: string) {
for (const pattern of BOT_PATTERNS) {
const regexp = new RegExp(pattern);
const result = userAgent.match(regexp);
if (result != null) return true;
}
return false;
}
export async function isAllowedToLog(project_id: string, website: string, ip: string, userAgent: string) {
const blacklistData = await AddressBlacklistModel.find({ project_id }, { address: 1 });
for (const blacklistedData of blacklistData) {
if (blacklistedData.address == ip) return false;
}
const botOptions = await BotTrafficOptionModel.findOne({ project_id }, { block: 1 });
if (botOptions && botOptions.block) {
const isbot = isBot(userAgent);
if (isbot) return false;
}
const whitelist = await DomainWhitelistModel.findOne({ project_id }, { domains: 1 });
if (!whitelist) return true;
if (!whitelist.domains) return true;
if (whitelist.domains.length == 0) return true;
const allowedDomains = whitelist.domains;
for (const allowedDomain of allowedDomains) {
const regexpDomain = new RegExp(allowedDomain.replace(/[.+?^${}()|[\]\\]/g, '\\$&').replace(/\*/g, '.*'));
const result = website.match(regexpDomain);
if (result != null) return true;
}
return false;
}

View File

@@ -2,6 +2,7 @@ import { Router, json } from "express";
import { createSessionHash, getIPFromRequest } from "./utils";
import { requireEnv } from "./shared/utils/requireEnv";
import { RedisStreamService } from "./shared/services/RedisStreamService";
import { isAllowedToLog } from "./controller";
const router = Router();
@@ -14,6 +15,10 @@ router.post('/keep_alive', json(jsonOptions), async (req, res) => {
try {
const ip = getIPFromRequest(req);
const sessionHash = createSessionHash(req.body.website, ip, req.body.userAgent);
const allowed = await isAllowedToLog(req.body.pid, req.body.website, ip, req.body.userAgent);
if (!allowed) return res.sendStatus(400);
await RedisStreamService.addToStream(streamName, {
...req.body, _type: 'keep_alive', sessionHash, ip,
instant: req.body.instant + '',
@@ -32,6 +37,9 @@ router.post('/metrics/push', json(jsonOptions), async (req, res) => {
const ip = getIPFromRequest(req);
const sessionHash = createSessionHash(req.body.website, ip, req.body.userAgent);
const allowed = await isAllowedToLog(req.body.pid, req.body.website, ip, req.body.userAgent);
if (!allowed) return res.sendStatus(400);
const { type } = req.body;
if (type === 0) {

View File

@@ -14,17 +14,37 @@ const jsonOptions = { limit: '25kb', type: allowAnyType }
const streamName = requireEnv('STREAM_NAME');
import DeprecatedRouter from "./deprecated";
import { isAllowedToLog } from "./controller";
import { connectDatabase } from "./shared/services/DatabaseService";
app.use((req, res, next) => {
console.log(req.method, req.path);
next();
})
app.use('/v1', DeprecatedRouter);
app.post('/event', express.json(jsonOptions), async (req, res) => {
try {
const startTime = Date.now();
const ip = getIPFromRequest(req);
const sessionHash = createSessionHash(req.body.website, ip, req.body.userAgent);
const flowHash = createFlowSessionHash(req.body.pid, ip, req.body.userAgent);
const allowed = await isAllowedToLog(req.body.pid, req.body.website, ip, req.body.userAgent);
if (!allowed) return res.sendStatus(400);
await RedisStreamService.addToStream(streamName, {
...req.body, _type: 'event', sessionHash, ip, flowHash,
timestamp: Date.now()
});
const duration = Date.now() - startTime;
await RedisStreamService.METRICS_PRODUCER_onProcess(process.env.NODE_APP_INSTANCE, duration);
return res.sendStatus(200);
} catch (ex: any) {
return res.status(500).json({ error: ex.message });
@@ -33,10 +53,22 @@ app.post('/event', express.json(jsonOptions), async (req, res) => {
app.post('/visit', express.json(jsonOptions), async (req, res) => {
try {
const startTime = Date.now();
const ip = getIPFromRequest(req);
const sessionHash = createSessionHash(req.body.website, ip, req.body.userAgent);
const flowHash = createFlowSessionHash(req.body.pid, ip, req.body.userAgent);
const allowed = await isAllowedToLog(req.body.pid, req.body.website, ip, req.body.userAgent);
if (!allowed) return res.sendStatus(400);
await RedisStreamService.addToStream(streamName, { ...req.body, _type: 'visit', sessionHash, ip, flowHash, timestamp: Date.now() });
const duration = Date.now() - startTime;
await RedisStreamService.METRICS_PRODUCER_onProcess(process.env.NODE_APP_INSTANCE, duration);
return res.sendStatus(200);
} catch (ex: any) {
return res.status(500).json({ error: ex.message });
@@ -45,14 +77,26 @@ app.post('/visit', express.json(jsonOptions), async (req, res) => {
app.post('/keep_alive', express.json(jsonOptions), async (req, res) => {
try {
const startTime = Date.now();
const ip = getIPFromRequest(req);
const sessionHash = createSessionHash(req.body.website, ip, req.body.userAgent);
const flowHash = createFlowSessionHash(req.body.pid, ip, req.body.userAgent);
const allowed = await isAllowedToLog(req.body.pid, req.body.website, ip, req.body.userAgent);
if (!allowed) return res.sendStatus(400);
await RedisStreamService.addToStream(streamName, {
...req.body, _type: 'keep_alive', sessionHash, ip,
instant: req.body.instant + '',
flowHash, timestamp: Date.now()
});
const duration = Date.now() - startTime;
await RedisStreamService.METRICS_PRODUCER_onProcess(process.env.NODE_APP_INSTANCE, duration);
return res.sendStatus(200);
} catch (ex: any) {
return res.status(500).json({ error: ex.message });
@@ -61,6 +105,7 @@ app.post('/keep_alive', express.json(jsonOptions), async (req, res) => {
async function main() {
const PORT = requireEnv("PORT");
await connectDatabase(process.env.MONGO_CONNECTION_STRING);
await RedisStreamService.connect();
app.listen(PORT, () => console.log(`Listening on port ${PORT}`));
}

View File

@@ -7,7 +7,6 @@ export function getIPFromRequest(req: Request) {
return ip;
}
export function createSessionHash(website: string, ip: string, userAgent: string) {
const dailySalt = new Date().toLocaleDateString('it-IT');
const sessionClean = dailySalt + website + ip + userAgent;
@@ -15,8 +14,6 @@ export function createSessionHash(website: string, ip: string, userAgent: string
return sessionHash;
}
// Track user flow from referrers to cto
export function createFlowSessionHash(project_id: string, ip: string, userAgent: string) {
const dailySalt = new Date().toLocaleDateString('it-IT');
const sessionClean = dailySalt + project_id + ip + userAgent;

View File

@@ -2,7 +2,8 @@
"compilerOptions": {
"module": "NodeNext",
"target": "ESNext",
"outDir": "dist"
"outDir": "dist",
"skipLibCheck": true
},
"include": [
"src/**/*.ts"