Skip to content

Commit 8371e1e

Browse files
committed
Add expand option to Result.to_df.
This option (when `True`) will make the driver flatten nodes, relationships, lists, and dicts into multiple columns of the DataFrame.
1 parent 8c902f0 commit 8371e1e

File tree

7 files changed

+609
-36
lines changed

7 files changed

+609
-36
lines changed

docs/source/api.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ Path :class:`neo4j.graph.Path`
987987
Node
988988
====
989989

990-
.. autoclass:: neo4j.graph.Node()
990+
.. autoclass:: neo4j.graph.Node
991991

992992
.. describe:: node == other
993993

@@ -1022,6 +1022,8 @@ Node
10221022

10231023
.. autoattribute:: id
10241024

1025+
.. autoattribute:: element_id
1026+
10251027
.. autoattribute:: labels
10261028

10271029
.. automethod:: get
@@ -1036,7 +1038,7 @@ Node
10361038
Relationship
10371039
============
10381040

1039-
.. autoclass:: neo4j.graph.Relationship()
1041+
.. autoclass:: neo4j.graph.Relationship
10401042

10411043
.. describe:: relationship == other
10421044

@@ -1076,6 +1078,8 @@ Relationship
10761078

10771079
.. autoattribute:: id
10781080

1081+
.. autoattribute:: element_id
1082+
10791083
.. autoattribute:: nodes
10801084

10811085
.. autoattribute:: start_node
@@ -1097,7 +1101,7 @@ Relationship
10971101
Path
10981102
====
10991103

1100-
.. autoclass:: neo4j.graph.Path()
1104+
.. autoclass:: neo4j.graph.Path
11011105

11021106
.. describe:: path == other
11031107

neo4j/_async/work/result.py

Lines changed: 106 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
from warnings import warn
2121

2222
from ..._async_compat.util import AsyncUtil
23-
from ...data import DataDehydrator
23+
from ...data import (
24+
DataDehydrator,
25+
RecordTableRowExporter,
26+
)
2427
from ...exceptions import (
2528
ResultConsumedError,
2629
ResultNotSingleError,
@@ -524,14 +527,83 @@ async def data(self, *keys):
524527

525528
@experimental("pandas support is experimental and might be changed or "
526529
"removed in future versions")
527-
async def to_df(self):
528-
"""Convert (the rest of) the result to a pandas DataFrame.
530+
async def to_df(self, expand=False):
531+
r"""Convert (the rest of) the result to a pandas DataFrame.
529532
530533
This method is only available if the `pandas` library is installed.
531534
532-
``tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m").to_df()``, for
533-
instance will return a DataFrame with two columns: ``n`` and ``m`` and
534-
10 rows.
535+
::
536+
537+
res = await tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m")
538+
df = await res.to_df()
539+
540+
for instance will return a DataFrame with two columns: ``n`` and ``m``
541+
and 10 rows.
542+
543+
:param expand: if :const:`True`, some structures in the result will be
544+
recursively expanded (flattened out into multiple columns) like so
545+
(everything inside ``<...>`` is a placeholder):
546+
547+
* :class:`.Node` objects under any variable ``<n>`` will be
548+
expanded into columns (the recursion stops here)
549+
550+
* ``<n>().prop.<property_name>`` (any) for each property of the
551+
node.
552+
* ``<n>().element_id`` (str) the node's element id.
553+
See :attr:`.Node.element_id`.
554+
* ``<n>().labels`` (frozenset of str) the node's labels.
555+
See :attr:`.Node.labels`.
556+
557+
* :class:`.Relationship` objects under any variable ``<r>``
558+
will be expanded into columns (the recursion stops here)
559+
560+
* ``<r>->.prop.<property_name>`` (any) for each property of the
561+
relationship.
562+
* ``<r>->.element_id`` (str) the relationship's element id.
563+
See :attr:`.Relationship.element_id`.
564+
* ``<r>->.start.element_id`` (str) the relationship's
565+
start node's element id.
566+
See :attr:`.Relationship.start_node`.
567+
* ``<r>->.end.element_id`` (str) the relationship's
568+
end node's element id.
569+
See :attr:`.Relationship.end_node`.
570+
* ``<r>->.type`` (str) the relationship's type.
571+
See :attr:`.Relationship.type`.
572+
573+
* :const:`list` objects under any variable ``<l>`` will be expanded
574+
into
575+
576+
* ``<l>[].0`` (any) the 1st list element
577+
* ``<l>[].1`` (any) the 2nd list element
578+
* ...
579+
580+
* :const:`dict` objects under any variable ``<d>`` will be expanded
581+
into
582+
583+
* ``<d>{}.<key1>`` (any) the 1st key of the dict
584+
* ``<d>{}.<key2>`` (any) the 2nd key of the dict
585+
* ...
586+
587+
* :const:`list` and :const:`dict` objects are expanded recursively.
588+
Example::
589+
590+
variable x: [{"foo": "bar", "baz": [42, 0]}, "foobar"]
591+
592+
will be expanded to::
593+
594+
{
595+
"x[].0{}.foo": "bar",
596+
"x[].0{}.baz[].0": 42,
597+
"n[].0{}.baz[].1": 0,
598+
"n[].1": "foobar"
599+
}
600+
601+
* Everything else (including :class:`.Path` objects) will not
602+
be flattened.
603+
604+
:const:`dict` keys and variable names that contain ``.`` or ``\``
605+
will be escaped with a backslash (``\.`` and ``\\`` respectively).
606+
:type expand: bool
535607
536608
:rtype: :py:class:`pandas.DataFrame`
537609
:raises ImportError: if `pandas` library is not available.
@@ -545,7 +617,34 @@ async def to_df(self):
545617
"""
546618
import pandas as pd
547619

548-
return pd.DataFrame(await self.values(), columns=self._keys)
620+
if not expand:
621+
return pd.DataFrame(await self.values(), columns=self._keys)
622+
else:
623+
df_keys = None
624+
rows = []
625+
async for record in self:
626+
row = RecordTableRowExporter().transform(dict(record.items()))
627+
if df_keys == row.keys():
628+
rows.append(row.values())
629+
elif df_keys is None:
630+
df_keys = row.keys()
631+
rows.append(row.values())
632+
elif df_keys is False:
633+
rows.append(row)
634+
else:
635+
# The rows have different keys. We need to pass a list
636+
# of dicts to pandas
637+
rows = [{k: v for k, v in zip(df_keys, r)} for r in rows]
638+
df_keys = False
639+
rows.append(row)
640+
if df_keys is False:
641+
return pd.DataFrame(rows)
642+
else:
643+
columns = df_keys or [
644+
k.replace(".", "\\.").replace("\\", "\\\\")
645+
for k in self._keys
646+
]
647+
return pd.DataFrame(rows, columns=columns)
549648

550649
def closed(self):
551650
"""Return True if the result has been closed.

neo4j/_sync/work/result.py

Lines changed: 106 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
from warnings import warn
2121

2222
from ..._async_compat.util import Util
23-
from ...data import DataDehydrator
23+
from ...data import (
24+
DataDehydrator,
25+
RecordTableRowExporter,
26+
)
2427
from ...exceptions import (
2528
ResultConsumedError,
2629
ResultNotSingleError,
@@ -524,14 +527,83 @@ def data(self, *keys):
524527

525528
@experimental("pandas support is experimental and might be changed or "
526529
"removed in future versions")
527-
def to_df(self):
528-
"""Convert (the rest of) the result to a pandas DataFrame.
530+
def to_df(self, expand=False):
531+
r"""Convert (the rest of) the result to a pandas DataFrame.
529532
530533
This method is only available if the `pandas` library is installed.
531534
532-
``tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m").to_df()``, for
533-
instance will return a DataFrame with two columns: ``n`` and ``m`` and
534-
10 rows.
535+
::
536+
537+
res = tx.run("UNWIND range(1, 10) AS n RETURN n, n+1 as m")
538+
df = res.to_df()
539+
540+
for instance will return a DataFrame with two columns: ``n`` and ``m``
541+
and 10 rows.
542+
543+
:param expand: if :const:`True`, some structures in the result will be
544+
recursively expanded (flattened out into multiple columns) like so
545+
(everything inside ``<...>`` is a placeholder):
546+
547+
* :class:`.Node` objects under any variable ``<n>`` will be
548+
expanded into columns (the recursion stops here)
549+
550+
* ``<n>().prop.<property_name>`` (any) for each property of the
551+
node.
552+
* ``<n>().element_id`` (str) the node's element id.
553+
See :attr:`.Node.element_id`.
554+
* ``<n>().labels`` (frozenset of str) the node's labels.
555+
See :attr:`.Node.labels`.
556+
557+
* :class:`.Relationship` objects under any variable ``<r>``
558+
will be expanded into columns (the recursion stops here)
559+
560+
* ``<r>->.prop.<property_name>`` (any) for each property of the
561+
relationship.
562+
* ``<r>->.element_id`` (str) the relationship's element id.
563+
See :attr:`.Relationship.element_id`.
564+
* ``<r>->.start.element_id`` (str) the relationship's
565+
start node's element id.
566+
See :attr:`.Relationship.start_node`.
567+
* ``<r>->.end.element_id`` (str) the relationship's
568+
end node's element id.
569+
See :attr:`.Relationship.end_node`.
570+
* ``<r>->.type`` (str) the relationship's type.
571+
See :attr:`.Relationship.type`.
572+
573+
* :const:`list` objects under any variable ``<l>`` will be expanded
574+
into
575+
576+
* ``<l>[].0`` (any) the 1st list element
577+
* ``<l>[].1`` (any) the 2nd list element
578+
* ...
579+
580+
* :const:`dict` objects under any variable ``<d>`` will be expanded
581+
into
582+
583+
* ``<d>{}.<key1>`` (any) the 1st key of the dict
584+
* ``<d>{}.<key2>`` (any) the 2nd key of the dict
585+
* ...
586+
587+
* :const:`list` and :const:`dict` objects are expanded recursively.
588+
Example::
589+
590+
variable x: [{"foo": "bar", "baz": [42, 0]}, "foobar"]
591+
592+
will be expanded to::
593+
594+
{
595+
"x[].0{}.foo": "bar",
596+
"x[].0{}.baz[].0": 42,
597+
"n[].0{}.baz[].1": 0,
598+
"n[].1": "foobar"
599+
}
600+
601+
* Everything else (including :class:`.Path` objects) will not
602+
be flattened.
603+
604+
:const:`dict` keys and variable names that contain ``.`` or ``\``
605+
will be escaped with a backslash (``\.`` and ``\\`` respectively).
606+
:type expand: bool
535607
536608
:rtype: :py:class:`pandas.DataFrame`
537609
:raises ImportError: if `pandas` library is not available.
@@ -545,7 +617,34 @@ def to_df(self):
545617
"""
546618
import pandas as pd
547619

548-
return pd.DataFrame(self.values(), columns=self._keys)
620+
if not expand:
621+
return pd.DataFrame(self.values(), columns=self._keys)
622+
else:
623+
df_keys = None
624+
rows = []
625+
for record in self:
626+
row = RecordTableRowExporter().transform(dict(record.items()))
627+
if df_keys == row.keys():
628+
rows.append(row.values())
629+
elif df_keys is None:
630+
df_keys = row.keys()
631+
rows.append(row.values())
632+
elif df_keys is False:
633+
rows.append(row)
634+
else:
635+
# The rows have different keys. We need to pass a list
636+
# of dicts to pandas
637+
rows = [{k: v for k, v in zip(df_keys, r)} for r in rows]
638+
df_keys = False
639+
rows.append(row)
640+
if df_keys is False:
641+
return pd.DataFrame(rows)
642+
else:
643+
columns = df_keys or [
644+
k.replace(".", "\\.").replace("\\", "\\\\")
645+
for k in self._keys
646+
]
647+
return pd.DataFrame(rows, columns=columns)
549648

550649
def closed(self):
551650
"""Return True if the result has been closed.

neo4j/data.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,59 @@ def transform(self, x):
297297
return x
298298

299299

300+
class RecordTableRowExporter(DataTransformer):
301+
"""Transformer class used by the :meth:`.Result.to_df` method."""
302+
303+
def transform(self, x):
304+
assert isinstance(x, Mapping)
305+
t = type(x)
306+
return t(item
307+
for k, v in x.items()
308+
for item in self._transform(
309+
v, prefix=k.replace("\\", "\\\\").replace(".", "\\.")
310+
).items())
311+
312+
def _transform(self, x, prefix):
313+
if isinstance(x, Node):
314+
res = {
315+
"%s().element_id" % prefix: x.element_id,
316+
"%s().labels" % prefix: x.labels,
317+
}
318+
res.update(("%s().prop.%s" % (prefix, k), v) for k, v in x.items())
319+
return res
320+
elif isinstance(x, Relationship):
321+
res = {
322+
"%s->.element_id" % prefix: x.element_id,
323+
"%s->.start.element_id" % prefix: x.start_node.element_id,
324+
"%s->.end.element_id" % prefix: x.end_node.element_id,
325+
"%s->.type" % prefix: x.__class__.__name__,
326+
}
327+
res.update(("%s->.prop.%s" % (prefix, k), v) for k, v in x.items())
328+
return res
329+
elif isinstance(x, Path) or isinstance(x, str):
330+
return {prefix: x}
331+
elif isinstance(x, Sequence):
332+
return dict(
333+
item
334+
for i, v in enumerate(x)
335+
for item in self._transform(
336+
v, prefix="%s[].%i" % (prefix, i)
337+
).items()
338+
)
339+
elif isinstance(x, Mapping):
340+
t = type(x)
341+
return t(
342+
item
343+
for k, v in x.items()
344+
for item in self._transform(
345+
v, prefix="%s{}.%s" % (prefix, k.replace("\\", "\\\\")
346+
.replace(".", "\\."))
347+
).items()
348+
)
349+
else:
350+
return {prefix: x}
351+
352+
300353
class DataHydrator:
301354
# TODO: extend DataTransformer
302355

0 commit comments

Comments
 (0)