diff --git a/.github/workflows/import_packages.yml b/.github/workflows/import_packages.yml index c9da1970..95d5c8d0 100644 --- a/.github/workflows/import_packages.yml +++ b/.github/workflows/import_packages.yml @@ -51,7 +51,7 @@ jobs: github_token: ${{ github.token }} workflow: ".github/workflows/import_packages.yml" workflow_conclusion: success - name: sqlite_vectordb_file + name: sqlite_data path: /tmp/ name_is_regexp: true skip_unpack: false @@ -71,11 +71,11 @@ jobs: - name: 'Run import_packages.py with poetry' run: | - poetry run python scripts/import_packages.py --jsonl-dir /tmp/jsonl-files --db-path /tmp/sqlite_data/vectordb.db + poetry run python scripts/import_packages.py --jsonl-dir /tmp/jsonl-files --vec-db-path /tmp/sqlite_data/vectordb.db - name: 'Upload SQLite Vector DB File' uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4 with: - name: sqlite_vectordb_file + name: sqlite_data path: /tmp/sqlite_data/vectordb.db retention-days: 90 diff --git a/scripts/import_packages.py b/scripts/import_packages.py index f119c6b6..e96d65e1 100644 --- a/scripts/import_packages.py +++ b/scripts/import_packages.py @@ -12,9 +12,9 @@ class PackageImporter: - def __init__(self, jsonl_dir="data", db_path="./sqlite_data/vectordb.db"): - os.makedirs(os.path.dirname(db_path), exist_ok=True) - self.db_path = db_path + def __init__(self, jsonl_dir="data", vec_db_path="./sqlite_data/vectordb.db"): + os.makedirs(os.path.dirname(vec_db_path), exist_ok=True) + self.vec_db_path = vec_db_path self.json_files = [ os.path.join(jsonl_dir, "archived.jsonl"), os.path.join(jsonl_dir, "deprecated.jsonl"), @@ -25,7 +25,7 @@ def __init__(self, jsonl_dir="data", db_path="./sqlite_data/vectordb.db"): self.model_path = "./codegate_volume/models/all-minilm-L6-v2-q5_k_m.gguf" def _get_connection(self): - conn = sqlite3.connect(self.db_path) + conn = sqlite3.connect(self.vec_db_path) conn.enable_load_extension(True) sqlite_vec_sl_tmp.load(conn) conn.enable_load_extension(False) @@ -129,12 +129,12 @@ def __del__(self): help="Directory containing JSONL files. Default is 'data'.", ) parser.add_argument( - "--db-path", + "--vec-db-path", type=str, default="./sqlite_data/vectordb.db", help="Path to SQLite database file. Default is './sqlite_data/vectordb.db'.", ) args = parser.parse_args() - importer = PackageImporter(jsonl_dir=args.jsonl_dir, db_path=args.db_path) + importer = PackageImporter(jsonl_dir=args.jsonl_dir, vec_db_path=args.vec_db_path) asyncio.run(importer.run_import())