From d3254eb2c954e2f478b234d2170f666def2996df Mon Sep 17 00:00:00 2001 From: vrtmrz Date: Tue, 14 Jun 2022 07:27:10 +0900 Subject: [PATCH] Upgraded with LiveSync 0.11.0 --- .gitmodules | 3 + package-lock.json | 27 +++ package.json | 1 + readme.md | 2 +- src/index.ts | 413 ++++++++++++++++++++++++++++++++++------------ src/lib | 1 + src/pouchdb.js | 9 +- 7 files changed, 349 insertions(+), 107 deletions(-) create mode 100644 .gitmodules create mode 160000 src/lib diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ea943b6 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "src/lib"] + path = src/lib + url = https://github.com/vrtmrz/livesync-commonlib diff --git a/package-lock.json b/package-lock.json index f264a5a..38e8083 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18,6 +18,7 @@ "pouchdb-mapreduce": "^7.2.2", "pouchdb-node": "^7.2.2", "pouchdb-replication": "^7.2.2", + "transform-pouch": "^2.0.0", "xxhash-wasm": "^1.0.1" }, "devDependencies": { @@ -1251,6 +1252,11 @@ "uuid": "8.1.0" } }, + "node_modules/pouchdb-wrappers": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/pouchdb-wrappers/-/pouchdb-wrappers-5.0.0.tgz", + "integrity": "sha512-fXqsVn+rmlPtxaAIGaQP5TkiaT39OMwvMk+ScLLtHrmfXD2KBO6fe/qBl38N/rpTn0h/A058dPN4fLAHt550zA==" + }, "node_modules/pouchdb/node_modules/readable-stream": { "version": "1.1.14", "integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=", @@ -1392,6 +1398,14 @@ "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", "integrity": "sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o=" }, + "node_modules/transform-pouch": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/transform-pouch/-/transform-pouch-2.0.0.tgz", + "integrity": "sha512-nDZovo0U5o0UdMNL93fMQgGjrwH9h4F/a7qqRTnF6cVA+FfgyXiJPTrSuD+LmWSO7r2deZt0P0oeCD8hkgxl5g==", + "dependencies": { + "pouchdb-wrappers": "^5.0.0" + } + }, "node_modules/ts-node": { "version": "10.5.0", "integrity": "sha512-6kEJKwVxAJ35W4akuiysfKwKmjkbYxwQMTBaAxo9KKAx/Yd26mPUyhGz3ji+EsJoAgrLqVsYHNuuYwQe22lbtw==", @@ -2542,6 +2556,11 @@ "uuid": "8.1.0" } }, + "pouchdb-wrappers": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/pouchdb-wrappers/-/pouchdb-wrappers-5.0.0.tgz", + "integrity": "sha512-fXqsVn+rmlPtxaAIGaQP5TkiaT39OMwvMk+ScLLtHrmfXD2KBO6fe/qBl38N/rpTn0h/A058dPN4fLAHt550zA==" + }, "prr": { "version": "1.0.1", "integrity": "sha1-0/wRS6BplaRexok/SEzrHXj19HY=" @@ -2642,6 +2661,14 @@ "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", "integrity": "sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o=" }, + "transform-pouch": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/transform-pouch/-/transform-pouch-2.0.0.tgz", + "integrity": "sha512-nDZovo0U5o0UdMNL93fMQgGjrwH9h4F/a7qqRTnF6cVA+FfgyXiJPTrSuD+LmWSO7r2deZt0P0oeCD8hkgxl5g==", + "requires": { + "pouchdb-wrappers": "^5.0.0" + } + }, "ts-node": { "version": "10.5.0", "integrity": "sha512-6kEJKwVxAJ35W4akuiysfKwKmjkbYxwQMTBaAxo9KKAx/Yd26mPUyhGz3ji+EsJoAgrLqVsYHNuuYwQe22lbtw==", diff --git a/package.json b/package.json index 42c2581..f8d9028 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "pouchdb-mapreduce": "^7.2.2", "pouchdb-node": "^7.2.2", "pouchdb-replication": "^7.2.2", + "transform-pouch": "^2.0.0", "xxhash-wasm": "^1.0.1" }, "devDependencies": { diff --git a/readme.md b/readme.md index 3660740..6e8ee1a 100644 --- a/readme.md +++ b/readme.md @@ -7,7 +7,7 @@ Notice: **We're on the bleeding edge.** Please make sure to back your vault up! ## How to run ```sh -git clone https://github.com/vrtmrz/filesystem-livesync +git clone --recursive https://github.com/vrtmrz/filesystem-livesync cp dat/config.sample.json dat/config.json # Setting up configuration vi dat/config.json diff --git a/src/index.ts b/src/index.ts index c4956bf..96a04c6 100644 --- a/src/index.ts +++ b/src/index.ts @@ -10,9 +10,10 @@ import { Logger } from "./logger.js"; //@ts-ignore import { PouchDB as PouchDB_src } from "./pouchdb.js"; -import { decrypt, encrypt } from "./e2ee.js"; import { configFile, connectConfig, eachConf, Entry, EntryLeaf, LoadedEntry, LOG_LEVEL, MAX_DOC_SIZE, MAX_DOC_SIZE_BIN, NewEntry, PlainEntry, TransferEntry } from "./types.js"; import { addKnownFile, addTouchedFile, calcDateDiff, DATEDIFF_EVEN, DATEDIFF_NEWER_A, DATEDIFF_OLDER_A, isKnownFile, isPlainText, isTouchedFile, path2unix } from "./util.js"; +import { enableEncryption, runWithLock, shouldSplitAsPlainText, splitPieces2 } from "./lib/src/utils.js"; +import { EntryDoc } from "./lib/src/types.js"; const xxhash = require("xxhash-wasm"); @@ -80,17 +81,205 @@ function triggerProcessor(procs: string) { runEngine(); }, 500); } +class LRUCache { + cache = new Map([]); + revCache = new Map([]); + maxCache = 100; + constructor() {} + get(key: string) { + // debugger + const v = this.cache.get(key); -let hashCache: { - [key: string]: string; -} = {}; -let hashCacheRev: { - [key: string]: string; -} = {}; + if (v) { + // update the key to recently used. + this.cache.delete(key); + this.revCache.delete(v); + this.cache.set(key, v); + this.revCache.set(v, key); + } + return v; + } + revGet(value: string) { + // debugger + const key = this.revCache.get(value); + if (value) { + // update the key to recently used. + this.cache.delete(key); + this.revCache.delete(value); + this.cache.set(key, value); + this.revCache.set(value, key); + } + return key; + } + set(key: string, value: string) { + this.cache.set(key, value); + this.revCache.set(value, key); + if (this.cache.size > this.maxCache) { + for (const kv of this.cache) { + this.revCache.delete(kv[1]); + this.cache.delete(kv[0]); + if (this.cache.size <= this.maxCache) break; + } + } + } +} + +const hashCaches = new LRUCache(); + +// // putDBEntry:COPIED FROM obsidian-livesync +// async function putDBEntry2(note: LoadedEntry, passphrase: string, database: PouchDB.Database) { +// let leftData = note.data; +// const savenNotes = []; +// let processed = 0; +// let made = 0; +// let skiped = 0; +// let pieceSize = MAX_DOC_SIZE_BIN; +// let plainSplit = false; +// let cacheUsed = 0; +// const userpasswordHash = h32Raw(new TextEncoder().encode(passphrase)); +// if (isPlainText(note._id)) { +// pieceSize = MAX_DOC_SIZE; +// plainSplit = true; +// } +// const newLeafs: EntryLeaf[] = []; +// do { +// // To keep low bandwith and database size, +// // Dedup pieces on database. +// // from 0.1.10, for best performance. we use markdown delimiters +// // 1. \n[^\n]{longLineThreshold}[^\n]*\n -> long sentence shuld break. +// // 2. \n\n shold break +// // 3. \r\n\r\n should break +// // 4. \n# should break. +// let cPieceSize = pieceSize; +// if (plainSplit) { +// let minimumChunkSize = 20; //default +// if (minimumChunkSize < 10) minimumChunkSize = 10; +// let longLineThreshold = 250; //default +// if (longLineThreshold < 100) longLineThreshold = 100; +// cPieceSize = 0; +// // lookup for next splittion . +// // we're standing on "\n" +// do { +// const n1 = leftData.indexOf("\n", cPieceSize + 1); +// const n2 = leftData.indexOf("\n\n", cPieceSize + 1); +// const n3 = leftData.indexOf("\r\n\r\n", cPieceSize + 1); +// const n4 = leftData.indexOf("\n#", cPieceSize + 1); +// if (n1 == -1 && n2 == -1 && n3 == -1 && n4 == -1) { +// cPieceSize = MAX_DOC_SIZE; +// break; +// } + +// if (n1 > longLineThreshold) { +// // long sentence is an established piece +// cPieceSize = n1; +// } else { +// // cPieceSize = Math.min.apply([n2, n3, n4].filter((e) => e > 1)); +// // ^ heavy. +// if (n1 > 0 && cPieceSize < n1) cPieceSize = n1; +// if (n2 > 0 && cPieceSize < n2) cPieceSize = n2 + 1; +// if (n3 > 0 && cPieceSize < n3) cPieceSize = n3 + 3; +// // Choose shorter, empty line and \n# +// if (n4 > 0 && cPieceSize > n4) cPieceSize = n4 + 0; +// cPieceSize++; +// } +// } while (cPieceSize < minimumChunkSize); +// } + +// // piece size determined. +// const piece = leftData.substring(0, cPieceSize); +// leftData = leftData.substring(cPieceSize); +// processed++; +// let leafid = ""; +// // Get hash of piece. +// let hashedPiece = ""; +// if (typeof hashCache[piece] !== "undefined") { +// hashedPiece = ""; +// leafid = hashCache[piece]; +// skiped++; +// cacheUsed++; +// } else { +// if (passphrase != "") { +// // When encryption has been enabled, make hash to be different between each passphrase to avoid inferring password. +// hashedPiece = "+" + (h32Raw(new TextEncoder().encode(piece)) ^ userpasswordHash).toString(16); +// } else { +// hashedPiece = h32(piece); +// } +// leafid = "h:" + hashedPiece; + +// //have to make +// const savePiece = piece; + +// const d: EntryLeaf = { +// _id: leafid, +// data: savePiece, +// type: "leaf", +// }; +// newLeafs.push(d); +// hashCache[piece] = leafid; +// hashCacheRev[leafid] = piece; +// made++; +// } +// savenNotes.push(leafid); +// } while (leftData != ""); +// let saved = true; +// if (newLeafs.length > 0) { +// try { +// const result = await database.bulkDocs(newLeafs); +// for (const item of result) { +// if ((item as any).ok) { +// Logger(`save ok:id:${item.id} rev:${item.rev}`, LOG_LEVEL.VERBOSE); +// } else { +// if ((item as any).status && (item as any).status == 409) { +// // conflicted, but it would be ok in childrens. +// } else { +// Logger(`save failed:id:${item.id} rev:${item.rev}`, LOG_LEVEL.NOTICE); +// Logger(item); +// // disposeHashCache(); +// saved = false; +// } +// } +// } +// } catch (ex) { +// Logger("ERROR ON SAVING LEAVES:", LOG_LEVEL.NOTICE); +// Logger(ex, LOG_LEVEL.NOTICE); +// saved = false; +// } +// } +// if (saved) { +// Logger(`note content saven, pieces:${processed} new:${made}, skip:${skiped}, cache:${cacheUsed}`); +// const newDoc: PlainEntry | NewEntry = { +// NewNote: true, +// children: savenNotes, +// _id: note._id, +// ctime: note.ctime, +// mtime: note.mtime, +// size: note.size, +// type: plainSplit ? "plain" : "newnote", +// }; +// // Here for upsert logic, +// try { +// const old = await database.get(newDoc._id); +// if (!old.type || old.type == "notes" || old.type == "newnote" || old.type == "plain") { +// // simple use rev for new doc +// newDoc._rev = old._rev; +// } +// } catch (ex: any) { +// if (ex.status && ex.status == 404) { +// // NO OP/ +// } else { +// throw ex; +// } +// } +// const ret = await database.put(newDoc, { force: true }); +// Logger(`note saved:${newDoc._id}:${ret.rev}`); +// return ret; +// } else { +// Logger(`note coud not saved:${note._id}`); +// } +// } -// putDBEntry:COPIED FROM obsidian-livesync async function putDBEntry(note: LoadedEntry, passphrase: string, database: PouchDB.Database) { - let leftData = note.data; + // let leftData = note.data; const savenNotes = []; let processed = 0; let made = 0; @@ -99,64 +288,40 @@ async function putDBEntry(note: LoadedEntry, passphrase: string, database: Pouch let plainSplit = false; let cacheUsed = 0; const userpasswordHash = h32Raw(new TextEncoder().encode(passphrase)); - if (isPlainText(note._id)) { + if (shouldSplitAsPlainText(note._id)) { pieceSize = MAX_DOC_SIZE; plainSplit = true; } + const newLeafs: EntryLeaf[] = []; - do { - // To keep low bandwith and database size, - // Dedup pieces on database. - // from 0.1.10, for best performance. we use markdown delimiters - // 1. \n[^\n]{longLineThreshold}[^\n]*\n -> long sentence shuld break. - // 2. \n\n shold break - // 3. \r\n\r\n should break - // 4. \n# should break. - let cPieceSize = pieceSize; - if (plainSplit) { - let minimumChunkSize = 20; //default - if (minimumChunkSize < 10) minimumChunkSize = 10; - let longLineThreshold = 250; //default - if (longLineThreshold < 100) longLineThreshold = 100; - cPieceSize = 0; - // lookup for next splittion . - // we're standing on "\n" - do { - const n1 = leftData.indexOf("\n", cPieceSize + 1); - const n2 = leftData.indexOf("\n\n", cPieceSize + 1); - const n3 = leftData.indexOf("\r\n\r\n", cPieceSize + 1); - const n4 = leftData.indexOf("\n#", cPieceSize + 1); - if (n1 == -1 && n2 == -1 && n3 == -1 && n4 == -1) { - cPieceSize = MAX_DOC_SIZE; - break; - } + // To keep low bandwith and database size, + // Dedup pieces on database. + // from 0.1.10, for best performance. we use markdown delimiters + // 1. \n[^\n]{longLineThreshold}[^\n]*\n -> long sentence shuld break. + // 2. \n\n shold break + // 3. \r\n\r\n should break + // 4. \n# should break. + let minimumChunkSize = 20; //default + if (minimumChunkSize < 10) minimumChunkSize = 10; + let longLineThreshold = 250; //default + if (longLineThreshold < 100) longLineThreshold = 100; - if (n1 > longLineThreshold) { - // long sentence is an established piece - cPieceSize = n1; - } else { - // cPieceSize = Math.min.apply([n2, n3, n4].filter((e) => e > 1)); - // ^ heavy. - if (n1 > 0 && cPieceSize < n1) cPieceSize = n1; - if (n2 > 0 && cPieceSize < n2) cPieceSize = n2 + 1; - if (n3 > 0 && cPieceSize < n3) cPieceSize = n3 + 3; - // Choose shorter, empty line and \n# - if (n4 > 0 && cPieceSize > n4) cPieceSize = n4 + 0; - cPieceSize++; - } - } while (cPieceSize < minimumChunkSize); - } + //benchmarhk - // piece size determined. - const piece = leftData.substring(0, cPieceSize); - leftData = leftData.substring(cPieceSize); + const pieces = splitPieces2(note.data, pieceSize, plainSplit, minimumChunkSize, longLineThreshold); + for (const piece of pieces()) { processed++; let leafid = ""; // Get hash of piece. let hashedPiece = ""; - if (typeof hashCache[piece] !== "undefined") { + let hashQ = 0; // if hash collided, **IF**, count it up. + let tryNextHash = false; + let needMake = true; + const cache = hashCaches.get(piece); + if (cache) { hashedPiece = ""; - leafid = hashCache[piece]; + leafid = cache; + needMake = false; skiped++; cacheUsed++; } else { @@ -167,42 +332,76 @@ async function putDBEntry(note: LoadedEntry, passphrase: string, database: Pouch hashedPiece = h32(piece); } leafid = "h:" + hashedPiece; + do { + let nleafid = leafid; + try { + nleafid = `${leafid}${hashQ}`; + const pieceData = await database.get(nleafid); + if (pieceData.type == "leaf" && pieceData.data == piece) { + leafid = nleafid; + needMake = false; + tryNextHash = false; + hashCaches.set(piece, leafid); + } else if (pieceData.type == "leaf") { + Logger("hash:collision!!"); + hashQ++; + tryNextHash = true; + } else { + leafid = nleafid; + tryNextHash = false; + } + } catch (ex) { + if (ex.status && ex.status == 404) { + //not found, we can use it. + leafid = nleafid; + needMake = true; + tryNextHash = false; + } else { + needMake = false; + tryNextHash = false; + throw ex; + } + } + } while (tryNextHash); + if (needMake) { + //have to make + const savePiece = piece; - //have to make - const savePiece = passphrase != "" ? await encrypt(piece, passphrase) : piece; - - const d: EntryLeaf = { - _id: leafid, - data: savePiece, - type: "leaf", - }; - newLeafs.push(d); - hashCache[piece] = leafid; - hashCacheRev[leafid] = piece; - made++; + const d: EntryLeaf = { + _id: leafid, + data: savePiece, + type: "leaf", + }; + newLeafs.push(d); + hashCaches.set(piece, leafid); + made++; + } else { + skiped++; + } } savenNotes.push(leafid); - } while (leftData != ""); + } let saved = true; if (newLeafs.length > 0) { try { const result = await database.bulkDocs(newLeafs); + for (const item of result) { - if ((item as any).ok) { - Logger(`save ok:id:${item.id} rev:${item.rev}`, LOG_LEVEL.VERBOSE); - } else { + if (!(item as any).ok) { if ((item as any).status && (item as any).status == 409) { // conflicted, but it would be ok in childrens. } else { - Logger(`save failed:id:${item.id} rev:${item.rev}`, LOG_LEVEL.NOTICE); + Logger(`Save failed:id:${item.id} rev:${item.rev}`, LOG_LEVEL.NOTICE); Logger(item); - // this.disposeHashCache(); saved = false; } } } + if (saved) { + Logger(`Chunk saved:${newLeafs.length} chunks`); + } } catch (ex) { - Logger("ERROR ON SAVING LEAVES:", LOG_LEVEL.NOTICE); + Logger("Chunk save failed:", LOG_LEVEL.NOTICE); Logger(ex, LOG_LEVEL.NOTICE); saved = false; } @@ -219,24 +418,27 @@ async function putDBEntry(note: LoadedEntry, passphrase: string, database: Pouch type: plainSplit ? "plain" : "newnote", }; // Here for upsert logic, - try { - const old = await database.get(newDoc._id); - if (!old.type || old.type == "notes" || old.type == "newnote" || old.type == "plain") { - // simple use rev for new doc - newDoc._rev = old._rev; + return await runWithLock("file:" + newDoc._id, false, async () => { + try { + const old = await database.get(newDoc._id); + if (!old.type || old.type == "notes" || old.type == "newnote" || old.type == "plain") { + // simple use rev for new doc + newDoc._rev = old._rev; + } + } catch (ex) { + if (ex.status && ex.status == 404) { + // NO OP/ + } else { + throw ex; + } } - } catch (ex: any) { - if (ex.status && ex.status == 404) { - // NO OP/ - } else { - throw ex; - } - } - const ret = await database.put(newDoc, { force: true }); - Logger(`note saved:${newDoc._id}:${ret.rev}`); - return ret; + const r = await database.put(newDoc, { force: true }); + Logger(`note saved:${newDoc._id}:${r.rev}`); + return r; + }); } else { Logger(`note coud not saved:${note._id}`); + return false; } } @@ -252,6 +454,9 @@ async function eachProc(syncKey: string, config: eachConf) { const processor = config.local?.processor ?? ""; const remote = new PouchDB(serverURI, { auth: serverAuth }); + if (serverAuth.passphrase != "") { + enableEncryption(remote as PouchDB.Database, serverAuth.passphrase); + } async function sanityCheck() { let mr = await remote.info(); @@ -501,7 +706,7 @@ async function eachProc(syncKey: string, config: eachConf) { addTouchedFile(pathSrc, mtime); await pushFile(pathSrc, stat); - // this.watchVaultChange(path, stat); + // watchVaultChange(path, stat); }); log("Start Database watching"); openConnection(conf, config.auto_reconnect ?? false); @@ -563,21 +768,21 @@ async function exportDoc(sendDoc: TransferEntry, docName: string, passphrase: st delete (v as any)?._rev; } - let decrypted_children = - passphrase == "" - ? children - : ( - await Promise.allSettled( - children.map(async (e: any) => { - e.data = await decrypt(e.data, passphrase); - return e; - }) - ) - ).map((e) => (e.status == "fulfilled" ? e.value : null)); + // let decrypted_children = + // passphrase == "" + // ? children + // : ( + // await Promise.allSettled( + // children.map(async (e: any) => { + // e.data = await decrypt(e.data, passphrase); + // return e; + // }) + // ) + // ).map((e) => (e.status == "fulfilled" ? e.value : null)); const dirName = path.dirname(writePath); log(`doc:${docName}: Exporting to ${writePath}`); await fs.mkdir(dirName, { recursive: true }); - const dt_plain = decrypted_children.map((e) => e.data).join(""); + const dt_plain = children.map((e: any) => e.data).join(""); const mtime = sendDoc.mtime ?? new Date().getTime(); addTouchedFile(writePath, mtime); @@ -651,4 +856,4 @@ async function main() { } } -main().then((_) => { }); +main().then((_) => {}); diff --git a/src/lib b/src/lib new file mode 160000 index 0000000..ae98905 --- /dev/null +++ b/src/lib @@ -0,0 +1 @@ +Subproject commit ae989051b5fd55db3e9a6b9facf45ee72c0d48c0 diff --git a/src/pouchdb.js b/src/pouchdb.js index db4f6e1..11eaae5 100644 --- a/src/pouchdb.js +++ b/src/pouchdb.js @@ -1,5 +1,10 @@ - -const pouchdb_src = require("pouchdb-core").plugin(require("pouchdb-find")).plugin(require("pouchdb-adapter-leveldb")).plugin(require("pouchdb-adapter-http")).plugin(require("pouchdb-mapreduce")).plugin(require("pouchdb-replication")); +const pouchdb_src = require("pouchdb-core") + .plugin(require("pouchdb-find")) + .plugin(require("pouchdb-adapter-leveldb")) + .plugin(require("pouchdb-adapter-http")) + .plugin(require("pouchdb-mapreduce")) + .plugin(require("pouchdb-replication")) + .plugin(require("transform-pouch")); const PouchDB = pouchdb_src; /** * @type {PouchDB.Static<>}