memgraph
diff --git a/‎pages/advanced-algorithms/available-algorithms.mdx‎
Lines changed: 1 addition & 0 deletions b/‎pages/advanced-algorithms/available-algorithms.mdx‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pages/advanced-algorithms/available-algorithms/collections.mdx‎
Lines changed: 33 additions & 0 deletions b/‎pages/advanced-algorithms/available-algorithms/collections.mdx‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎pages/clustering/high-availability.mdx‎
Lines changed: 2 additions & 1 deletion b/‎pages/clustering/high-availability.mdx‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pages/data-migration.mdx‎
Lines changed: 7 additions & 2 deletions b/‎pages/data-migration.mdx‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎pages/data-migration/_meta.ts‎
Lines changed: 1 addition & 0 deletions b/‎pages/data-migration/_meta.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pages/data-migration/best-practices.mdx‎
Lines changed: 1 addition & 1 deletion b/‎pages/data-migration/best-practices.mdx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pages/data-migration/parquet.mdx‎
Lines changed: 287 additions & 0 deletions b/‎pages/data-migration/parquet.mdx‎
Lines changed: 287 additions & 0 deletions
diff --git a/‎pages/database-management/authentication-and-authorization/_meta.ts‎
Lines changed: 1 addition & 0 deletions b/‎pages/database-management/authentication-and-authorization/_meta.ts‎
Lines changed: 1 addition & 0 deletions
@@ -139,6 +139,7 @@ This table shows the mapping between APOC functions/procedures and their Memgrap
 | apoc.coll.removeAll | Removes defined elements from the input list | [collections.remove_all()](/advanced-algorithms/available-algorithms/collections#remove_all) |
 | apoc.coll.contains | Verifies the existence of an input value in an input list | [collections.contains()](/advanced-algorithms/available-algorithms/collections#contains) |
 | apoc.coll.flatten | Returns flattened list of inputs provided | [collections.flatten()](/advanced-algorithms/available-algorithms/collections#flatten) |
+| apoc.coll.frequenciesAsMap | Returns a map of frequencies of the items in the collection  | [collections.frequencies_as_map()](/advanced-algorithms/available-algorithms/collections#frequencies_as_map) |
 | apoc.coll.pairs | Creates pairs of neighbor elements within an input list | [collections.pairs()](/advanced-algorithms/available-algorithms/collections#pairs) |
 | apoc.coll.toSet | Converts the input list to a set | [collections.to_set()](/advanced-algorithms/available-algorithms/collections#to_set) |
 | apoc.coll.sum | Calculates the sum of listed elements | [collections.sum()](/advanced-algorithms/available-algorithms/collections#sum) |
 
@@ -331,6 +331,39 @@ RETURN collections_module.flatten(input_list) as result
 +---------------------------------------------------------+
 ```
 
+### `frequencies_as_map()`
+
+Returns a map of frequencies of the items in the collection.
+
+<Callout type="info">
+This function is equivalent to **apoc.coll.frequenciesAsMap**.
+</Callout>
+
+{<h4 className="custom-header"> Input: </h4>}
+
+- `coll: List[Any]` ➡ The collection whose item frequencies will be counted.
+
+{<h4 className="custom-header"> Output: </h4>}
+
+- `Map[String, Integer]` ➡ A map where keys are string representations of the
+items, and values are their frequencies.
+
+{<h4 className="custom-header"> Usage: </h4>}
+
+The following query will count the frequency of each element in the list:
+
+```cypher
+RETURN collections.frequencies_as_map([1, 1, 2, 1, 3, 4, 1, 3]) AS result;
+```
+
+```plaintext
++---------------------------------------------------------+
+| result                                                  |
++---------------------------------------------------------+
+| {"1": 4, "2": 1, "3": 2, "4": 1}                        |
++---------------------------------------------------------+
+```
+
 ### `pairs()`
 
 Creates pairs of neighbor elements within an input list.
 
@@ -619,7 +619,8 @@ the cluster:
   synchronize its state.
 - The replica's old durability files will be preserved in a `.old` directory in
   `data_directory/snapshots` and `data_directory/wal` folders, allowing admins
-  to manually recover data if needed.
+  to manually recover data if needed. The `.old` directory is reused for subsequent
+  recovery operations, meaning **only a single backup is maintained at any time**.
 
 Depending on the replication mode used, there are different levels of data loss
 that can happen upon the failover. With the default `SYNC` replication mode,
 
@@ -15,7 +15,7 @@ instance. Whether your data is structured in files, relational databases, or
 other graph databases, Memgraph provides the flexibility to integrate and
 analyze your data efficiently.
 
-Memgraph supports file system imports like CSV files, offering efficient and
+Memgraph supports file system imports like Parquet and CSV files, offering efficient and
 structured data ingestion. **However, if you want to migrate directly from
 another data source, you can use the [`migrate`
 module](/advanced-algorithms/available-algorithms/migrate)** from Memgraph MAGE
@@ -37,6 +37,11 @@ that leverages the LLM to automate the process of modeling and migration.
 
 ## File types
 
+### Parquet files
+
+Parquet files can be imported efficiently from the local disk and from s3:// using the
+[LOAD PARQUET clause](/querying/clauses/load-parquet).
+
 ### CSV files
 
 CSV files provide a simple and efficient way to import tabular data into Memgraph 
@@ -268,4 +273,4 @@ nonsense or sales pitch, just tech.
   />
 </Cards>
 
-<CommunityLinks/>
+<CommunityLinks/>
@@ -1,6 +1,7 @@
 export default {
   "best-practices": "Best practices",
   "csv": "CSV",
+  "parquet": "PARQUET",
   "json": "JSON",
   "cypherl": "CYPHERL",
   "migrate-from-neo4j": "Migrate from Neo4j",
 
@@ -572,4 +572,4 @@ For more information about `Delta` objects, check the
 information on the [IN_MEMORY_TRANSACTIONAL storage mode](/fundamentals/storage-memory-usage#in-memory-transactional-storage-mode-default).
 
 
-<CommunityLinks/>
+<CommunityLinks/>
@@ -0,0 +1,287 @@
+---
+title: Import data from Parquet files
+description: Leverage Parquet files in Memgraph operations. Our detailed guide simplifies the process for an enhanced graph computing journey.
+---
+
+import { Callout } from 'nextra/components'
+import { Steps } from 'nextra/components'
+import { Tabs } from 'nextra/components'
+import {CommunityLinks} from '/components/social-card/CommunityLinks'
+
+# Import data from Parquet file
+
+The data from Parquet files can be imported using the [`LOAD PARQUET` Cypher clause](#load-parquet-cypher-clause) from the local disk
+and from the s3.
+
+## `LOAD PARQUET` Cypher clause
+
+The `LOAD PARQUET` clause uses a background thread that reads the Parquet file
+in column batches, assembles them into row batches of 64K rows and places those
+batches into a queue. The main thread then pulls each batch from the queue and
+processes it row by row. For every row, it binds the parsed values to the
+specified variables and either populates the database (if it is empty) or
+appends the new rows to an existing dataset.
+
+
+### `LOAD PARQUET` clause syntax
+
+The syntax of the `LOAD PARQUET` clause is:
+
+```cypher
+LOAD PARQUET FROM <parquet-location> ( WITH CONFIG configs=configMap ) ? AS <variable-name>
+```
+
+- `<parquet-location>` is a string that specifies where the Parquet file is
+  located.<br/>
+  If the path **does not** start with `s3://`, it is treated as a local file
+  path. If it **does** start with `s3://`, Memgraph retrieves the file from the
+  S3-compatible storage using the provided URI. There are no restrictions on the
+  file’s location within your local file system, as long as the path is valid
+  and the file exists. If you are using Docker to run Memgraph, you will need to
+  [copy the files from your local directory into
+  Docker](/getting-started/first-steps-with-docker#copy-files-from-and-to-a-docker-container)
+  container where Memgraph can access them. <br/>
+
+- `<configs>` Represents an optional configuration map through which you can
+  specify configuration options: `aws_region`, `aws_access_key`,
+  `aws_secret_key` and `aws_endpoint_url`.
+  - `<aws_region>`: The region in which your S3 service is being located
+  - `<aws_access_key>`: Access key used to connect to S3 service
+  - `<aws_secret_key>`: Secret key used to connect S3 service
+  - `<aws_endpoint_url`>: Optional configuration parameter. Can be used to set
+    the URL of the S3 compatible storage.
+- `<variable-name>` is a symbolic name representing the variable to which the
+  contents of the parsed row will be bound to, enabling access to the row
+  contents later in the query. The variable doesn't have to be used in any
+  subsequent clause.
+
+### `LOAD PARQUET` clause specificities
+
+When using the `LOAD PARQUET` clause please keep in mind:
+
+- **Type handling:** <br/>
+  The parser reads each value using its native Parquet type, so you should
+  receive the same data type inside Memgraph. The following types are supported:
+  **BOOL, INT8, INT16, INT32, INT64, UINT8, UINT16, UINT32, UINT64, HALF_FLOAT,
+  FLOAT, DOUBLE, STRING, LARGE_STRING, STRING_VIEW, DATE32, DATE64, TIME32,
+  TIME64, TIMESTAMP, DURATION, DECIMAL128, DECIMAL256, BINARY, LARGE_BINARY,
+  FIXED_SIZE_BINARY, LIST, MAP.** <br/>
+  Any unsupported types are automatically stored as strings. Note that
+  `UINT64_T` values are cast to `INT64_T` because Memgraph does not support
+  unsigned 64-bit integers, and the Cypher standard only defines 64-bit signed
+  integers.
+ 
+- **Authentication parameters:** <br/>
+  Parameters for accessing S3-compatible storage (`aws_region`,
+  `aws_access_key`, `aws_secret_key`, and `aws_endpoint_url`) can be provided in
+  three ways:
+
+  1. Directly in the `LOAD PARQUET` query using the `WITH CONFIG` clause.
+  2. Through environment variables: `AWS_REGION`, `AWS_ACCESS_KEY`,
+     `AWS_SECRET_KEY`, and `AWS_ENDPOINT_URL`.
+  3. Through run-time database settings, using: `SET DATABASE SETTING <key> TO
+     <value>;` The corresponding setting keys are: `aws.access_key`,
+     `aws.region`, `aws.secret_key`, and `aws.endpoint_url`.
+
+
+- **The `LOAD PARQUET` clause is not a standalone clause**,  meaning a valid query
+  must contain at least one more clause, for example:
+
+  ```cypher
+  LOAD PARQUET FROM "/people.parquet" AS row
+  CREATE (p:Person) SET p += row;
+  ```
+
+  In this regard, the following query will throw an exception:
+
+  ```cypher
+  LOAD PARQUET FROM "/file.parquet" AS row;
+  ```
+
+  **Adding a `MATCH` or `MERGE` clause before `LOAD PARQUET`** allows you to
+  match certain entities in the graph before running `LOAD PARQUET`, optimizing
+  the process as matched entities do not need to be searched for every row in
+  the `PARQUET` file.  
+
+  But, the `MATCH` or `MERGE` clause can be used prior the `LOAD PARQUET` clause
+  only if the clause returns only one row. Returning multiple rows before
+  calling the `LOAD PARQUET` clause will cause a Memgraph runtime error.
+
+- **The `LOAD PARQUET` clause can be used at most once per query**, so queries
+  like the one below will throw an exception:
+
+  ```cypher
+  LOAD PARQUET FROM "/x.parquet" AS x
+  LOAD PARQUET FROM "/y.parquet" AS y
+  CREATE (n:A {p1 : x, p2 : y});
+  ```
+
+### Increase import speed
+
+You can significantly increase data-import speed when using the `LOAD PARQUET`
+clause by taking advantage of indexing, batching, and analytical storage mode.
+
+#### 1. Create indexes
+
+`LOAD PARQUET` can establish relationships much faster if
+[indexes](/fundamentals/indexes) on nodes or node properties are created *after*
+loading the associated nodes:
+
+```cypher
+CREATE INDEX ON :Node(id);
+```
+
+If `LOAD PARQUET` is **merging** existing data rather than creating new records,
+then create the indexes **before** running the import.
+
+#### 2. Use Periodic commits
+
+The `USING PERIODIC COMMIT <BATCH_SIZE>` construct optimizes memory allocation
+and can improve import speed by **25–35%** based on our benchmarks.
+
+```cypher
+USING PERIODIC COMMIT 1024
+LOAD PARQUET FROM "/x.parquet" AS x
+CREATE (n:A {p1: x, p2: y});
+```
+
+#### 3. Switch to analytical storage mode
+
+Import performance can also improve by switching Memgraph to [analytical storage
+mode](/fundamentals/storage-memory-usage#storage-modes), which relaxes ACID
+guarantees except for manually created snapshots. Once the import is complete,
+you can switch back to transactional mode to restore full ACID guarantees.
+
+Switch storage modes within a session:
+
+```
+STORAGE MODE IN_MEMORY_{TRANSACTIONAL|ANALYTICAL};
+```
+
+#### 4. Run Imports in Parallel
+
+When using `IN_MEMORY_ANALYTICAL` mode and storing nodes and relationships in
+separate Parquet files, you can run multiple concurrent `LOAD PARQUET` queries
+to accelerate the import even further.
+
+For best performance:
+
+1. Split node and relationship data into smaller files.
+2. Run all `LOAD PARQUET` statements that **create nodes** first.
+3. Then run all `LOAD PARQUET` statements that **create relationships**.
+
+
+### Usage example
+
+In this example, we will import multiple Parquet files with distinct graph
+objects. The data is split across four files, each file contains nodes of a
+single label or relationships of a single type. 
+
+<Steps>
+
+  {<h3 className="custom-header">Parquet files</h3>}
+
+  - [`people_nodes.parquet`](s3://download.memgraph.com/asset/docs/people_nodes.parquet) is used to create nodes labeled `:Person`.<br/> The file contains the following data:
+    ```parquet
+    id,name,age,city
+    100,Daniel,30,London
+    101,Alex,15,Paris
+    102,Sarah,17,London
+    103,Mia,25,Zagreb
+    104,Lucy,21,Paris
+    ```
+- [`restaurants_nodes.parquet`](s3://download.memgraph.com/asset/docs/restaurants_nodes.parquet) is used to create nodes labeled `:Restaurants`.<br/> The file contains the following data:
+    ```parquet
+    id,name,menu
+    200,Mc Donalds,Fries;BigMac;McChicken;Apple Pie
+    201,KFC,Fried Chicken;Fries;Chicken Bucket
+    202,Subway,Ham Sandwich;Turkey Sandwich;Foot-long
+    203,Dominos,Pepperoni Pizza;Double Dish Pizza;Cheese filled Crust
+    ```
+
+- [`people_relationships.parquet`](s3://download.memgraph.com/asset/docs/people_relationships.parquet) is used to connect people with the `:IS_FRIENDS_WITH` relationship.<br/> The file contains the following data:
+    ```parquet
+    first_person,second_person,met_in
+    100,102,2014
+    103,101,2021
+    102,103,2005
+    101,104,2005
+    104,100,2018
+    101,102,2017
+    100,103,2001
+    ```
+-  [`restaurants_relationships.parquet`](s3://download.memgraph.com/asset/docs/restaurants_relationships.parquet) is used to connect people with restaurants using the `:ATE_AT` relationship.<br/> The file contains the following data:
+    ```parquet
+    PERSON_ID,REST_ID,liked
+    100,200,true
+    103,201,false
+    104,200,true
+    101,202,false
+    101,203,false
+    101,200,true
+    102,201,true
+    ```
+
+  {<h3 className="custom-header">Import nodes</h3>}
+
+  Each row will be parsed as a map, and the
+  fields can be accessed using the property lookup syntax (e.g. `id: row.id`). Files can be imported directly from s3 or can be downloaded and then accessed from the local disk.
+
+  The following query will load row by row from the file, and create a new node
+  for each row with properties based on the parsed row values:
+
+      ```cypher
+      LOAD PARQUET FROM "s3://download.memgraph.com/asset/docs/people_nodes.parquet" AS row
+      CREATE (n:Person {id: row.id, name: row.name, age: row.age, city: row.city});
+      ```
+
+  In the same manner, the following query will create new nodes for each restaurant:
+
+      ```cypher
+      LOAD PARQUET FROM "s3://download.memgraph.com/asset/docs/restaurants_nodes.parquet" AS row
+      CREATE (n:Restaurant {id: row.id, name: row.name, menu: row.menu});
+      ```
+
+  {<h3 className="custom-header">Create indexes</h3>}
+
+  Creating an [index](/fundamentals/indexes) on a property used to connect nodes
+  with relationships, in this case, the `id` property of the `:Person` nodes,
+  will speed up the import of relationships, especially with large datasets:
+
+      ```cypher
+      CREATE INDEX ON :Person(id);
+      ```
+
+  {<h3 className="custom-header">Import relationships</h3>}
+  The following query will create relationships between the people nodes:
+
+  ```cypher
+  LOAD PARQUET FROM "s3://download.memgraph.com/asset/docs/people_relationships.parquet" AS row
+  MATCH (p1:Person {id: row.first_person})
+  MATCH (p2:Person {id: row.second_person})
+  CREATE (p1)-[f:IS_FRIENDS_WITH]->(p2)
+  SET f.met_in = row.met_in;
+  ```
+
+  The following query will create relationships between people and restaurants where they ate:
+
+  ```cypher
+  LOAD PARQUET FROM "s3://download.memgraph.com/asset/docs/restaurants_relationships.parquet" AS row
+  MATCH (p1:Person {id: row.PERSON_ID})
+  MATCH (re:Restaurant {id: row.REST_ID})
+  CREATE (p1)-[ate:ATE_AT]->(re)
+  SET ate.liked = ToBoolean(row.liked);
+  ```
+
+  {<h3 className="custom-header">Final result</h3>}
+  Run the following query to see how the imported data looks as a graph:
+
+  ```
+  MATCH p=()-[]-() RETURN p;
+  ```
+
+  ![](/pages/data-migration/csv/load_csv_restaurants_relationships.png)
+
+</Steps>
+
+<CommunityLinks/>
@@ -1,6 +1,7 @@
 export default {
     "users": "Users",
     "role-based-access-control": "Role-based access control",
+    "mlbac-migration-guide": "Migrating to v3.7 LBAC",
     "multiple-roles": "Multiple roles per user and multi-tenant roles",
     "auth-system-integrations": "Auth system integrations",
     "impersonate-user": "Impersonate user",
Original file line number	Diff line number	Diff line change
@@ -572,4 +572,4 @@ For more information about `Delta` objects, check the
`572`	`572`	`information on the [IN_MEMORY_TRANSACTIONAL storage mode](/fundamentals/storage-memory-usage#in-memory-transactional-storage-mode-default).`
`573`	`573`
`574`	`574`
`575`		`-<CommunityLinks/>`
	`575`	`+<CommunityLinks/>`