Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 35 additions & 4 deletions src/foapy/alphabet.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,51 @@ def alphabet(X) -> np.ndarray:
>>> result
Exception
"""
# ex.:
# data = ['a', 'c', 'c', 'e', 'd', 'a']
data = np.asanyarray(X)
if data.ndim > 1: # Checking for d1 array
raise Not1DArrayException(
{"message": f"Incorrect array form. Expected d1 array, exists {data.ndim}"}
)

# Sort data positions
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it would be clearer to write "Sort element indices" or "Get original indices of sorted data array"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Sort data positions
# Indices that would sort data array

# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
perm = data.argsort(kind="mergesort")

mask_shape = data.shape
unique_mask = np.empty(mask_shape, dtype=bool)
# Create tmp mask array to store True on positions where appears new value
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Create mask array to store True on positions where new value appears for the first time in the sorted array to distinguish where subarray of one element ends and another begins"

# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
# perm[1:] = [ 5, 1, 2, 4, 3]
# perm[:-1] = [0, 5, 1, 2, 4 ]

# data[perm[1:]] = [ 'a', 'c', 'c', 'd', 'e']
# data[perm[:-1]] = [ 'a', 'a', 'c', 'c', 'd']
# data[perm[1:]] != data[perm[:-1]] = [ False, True, False, True, True]
# unique_mask = [True, False, True, False, True, True]
# a a c c d e
unique_mask = np.empty(data.shape, dtype=bool)
# First element is new
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"First element is always new"

unique_mask[:1] = True
# Set true on positions where value differs from previous
unique_mask[1:] = data[perm[1:]] != data[perm[:-1]]

# Create tmp array that will store reverse sorted mask array
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Create mask array to store True on positions of the data array where new value appears for the first time"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Create tmp array that will store reverse sorted mask array
# Create mask array to store True on positions of the data array where new value appears for the first time

# ex.:
# a a c c d e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# a a c c d e
# sorted data array a a c c d e

# unique_mask = [True, False, True, False, True, True]
# perm = [ 0, 5, 1, 2, 4, 3]
# perm[unique_mask] = [ 0, 1, 4, 3]
# result_mask = [True, True, False, True, True, False]
# a c c e d a
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have moved this one line higher and added that this is "data" array

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The difference is that on top in sorted data on bottom original order data

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# a c c e d a
# original data array a c c e d a

result_mask = np.full_like(unique_mask, False)
result_mask[:1] = True
result_mask[perm[unique_mask]] = True

# Return elements that are first appears of unique values
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Return array of first occurrences of elements in the data array"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Return elements that are first appears of unique values
# Return array of first occurrences of elements in the data array

# ex.:
# data = [ 'a', 'c', 'c', 'e', 'd', 'a' ]
# result_mask = [True, True, False, True, True, False]
# data[result_mask] = [ 'a', 'c', 'e', 'd' ]
return data[result_mask]
200 changes: 199 additions & 1 deletion src/foapy/intervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,52 +61,250 @@ def intervals(X, bind, mod):
raise ValueError(
{"message": "Invalid mode value. Use mode.lossy,normal,cycle or redundant."}
)

# ex.:
# ar = ['a', 'c', 'c', 'e', 'd', 'a']
ar = np.asanyarray(X)

if ar.shape == (0,):
return []

if bind == binding.end:
# For binding to the end, we need to reverse the array
# ar = ['a', 'd', 'e', 'c', 'c', 'a']
ar = ar[::-1]

# Sort data positions
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Get original indices of sorted data array"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Sort data positions
# Get original indices of sorted data array

# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
perm = ar.argsort(kind="mergesort")

# Create tmp mask array to store True on positions where appears new value.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Create mask array to store True on positions where new value appears for the first time in the sorted array to distinguish where subarray of one element ends and another begins"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Create tmp mask array to store True on positions where appears new value.
# Create mask array to store True on positions where new value appears for the first time in the sorted array to distinguish where subarray of one element ends and another begins

# Create shape length +1 of source,
# because we want to use the array for all binding modes.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"to use it as both first occurrence marker and last occurrence marker depending on the shift of the data array"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# because we want to use the array for all binding modes.
# to use it as both first occurrence marker and last occurrence marker depending on the shift of the data array

# ex.:
# Create tmp mask array to store True on positions where appears new value
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate with line 82

# ex.:
# a a c c d e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# a a c c d e
# sorted data array a a c c d e

# perm = [0, 5, 1, 2, 4, 3]
# perm[1:] = [ 5, 1, 2, 4, 3]
# perm[:-1] = [0, 5, 1, 2, 4 ]

# data[perm[1:]] = [ 'a', 'c', 'c', 'd', 'e' ]
# data[perm[:-1]] = [ 'a', 'a', 'c', 'c', 'd' ]
# data[perm[1:]] != data[perm[:-1]] = [ False, True, False, True, True ]
# unique_mask = [True, False, True, False, True, True, True]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is just "mask" in code

# First appears a a c c d e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# First appears a a c c d e
# First occurrence a a c c d e

# Last appears a a c c d e
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Last appears a a c c d e
# Last occurrence a a c c d e


mask_shape = ar.shape
mask = np.empty(mask_shape[0] + 1, dtype=bool)
mask[:1] = True
mask[1:-1] = ar[perm[1:]] != ar[perm[:-1]]
mask[-1:] = True # or mask[-1] = True

# Save masks first and last appears of elements
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Create masks of first and last occurrences of elements by excluding first and last elements from unique_mask accordingly"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Save masks first and last appears of elements
# Create masks of first and last occurrences of elements by excluding first and last elements from unique_mask accordingly

# ex.:
#
# unique_mask = [True, False, True, False, True, True, True]
# first_mask = [True, False, True, False, True, True ]
# a a c c d e
# last_mask = [ False, True, False, True, True, True]
# a a c c d e
first_mask = mask[:-1]
last_mask = mask[1:]

# Create tmp array to count intervals
intervals = np.empty(ar.shape, dtype=np.intp)

# Count intervals between elements.
# Intervals of first elements appears would be wrong on that stage.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Intervals of first elements appears would be wrong on that stage.
# Intervals of the first occurrence of all elements would be wrong on that stage.

# We will fix that later.
# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
# perm[1:] = [ 5, 1, 2, 4, 3]
# perm[:-1] = [ 0, 5, 1, 2, 4]
# perm[1:] - perm[:-1] = [ 5, -4, 1, 2, -1]
# intervals = [0, 5, -4, 1, 2, -1]
# ^ ^ ^ - wrong intervals
intervals[1:] = perm[1:] - perm[:-1]

# Fix first and last intervals
# For any mode except cycle delta would be 1
# For cycle mode delta would be an array

# ex.:
# len(ar) = 6
# a a c c d e
# perm = [ 0, 5, 1, 2, 4, 3]
# last_mask = [False, True, False, True, True, True]
# perm[last_mask] = [ 5, 2, 4, 3]
# len(ar) - perm[last_mask] = [ 1, 4, 2, 3]
# delta = [ 1, 4, 2, 3]
# a c d e
delta = len(ar) - perm[last_mask] if mod == mode.cycle else 1

# ex.:
# a a c c d e
# perm = [ 0, 5, 1, 2, 4, 3]
# first_mask = [True, False, True, False, True, True]
# perm[first_mask] = [ 0, 1, 4, 3]
# a c d e
# For all modes except cycle
# a a c c d e
# intervals = [ 0, 5, -4, 1, 2, -1]
# perm[first_mask] + delta = [ 1, 2, 5, 4]
# first_mask = [True, False, True, False, True, True]
# intervals = [ 1, 5, 2, 1, 5, 4]
# a a c c d e

# For cycle mode
# a a c c d e
# intervals = [ 0, 5, -4, 1, 2, -1]
# first_mask = [True, False, True, False, True, True]
# perm[first_mask] = [ 0, 1, 4, 3]
# delta = [ 1, 4, 2, 3]
# perm[first_mask] + delta = [ 1, 5, 6, 6]
# intervals = [ 1, 5, 5, 1, 6, 6]
# a a c c d e
intervals[first_mask] = perm[first_mask] + delta

# Create inverse permutation array
inverse_perm = np.empty(ar.shape, dtype=np.intp)
# ex.:
# a a c c d e
# perm = [0, 5, 1, 2, 4, 3]
# np.arange(ar.shape[0]) = [0, 1, 2, 3, 4, 5]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# a c c e d a
inverse_perm[perm] = np.arange(ar.shape[0])

# Create result array depending on mode
if mod == mode.lossy:
# For lossy mode we ignore intervals for a first appearance of the element
# ex.:
# a a c c d e
# intervals = [ 1, 5, 5, 1, 6, 6]
# first_mask = [True, False, True, False, True, True]
# intervals = [ 0, 5, 0, 1, 0, 0]
# a a c c d e
intervals[first_mask] = 0

# Permute intervals array to the original order
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Permute intervals array to the original order
# Permute intervals array to the original arrangement

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For clarity it is better to use "arrangement" here.

# ex.:
# a a c c d e
# intervals = [0, 5, 0, 1, 0, 0]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# intervals = [0, 0, 1, 0, 0, 5]
# a c c e d a
intervals = intervals[inverse_perm]

# Remove zeros from the array
# ex.:
# a c c e d a
# intervals = [0, 0, 1, 0, 0, 5]
# intervals[intervals != 0] = [ 1, 5]
# result = [ 1, 5]
# c a
result = intervals[intervals != 0]
elif mod == mode.normal:
# For normal mode we permute intervals array to the original order
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# For normal mode we permute intervals array to the original order
# For normal mode we permute intervals array to the original arrangement

# ex.:
# a a c c d e
# intervals = [1, 5, 2, 1, 5, 4]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# intervals[inverse_perm] = [1, 2, 1, 4, 5, 5]
# a c c e d a
# result = [1, 2, 1, 4, 5, 5]
result = intervals[inverse_perm]
elif mod == mode.cycle:
# For cycle mode we permute intervals array to the original order
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# For cycle mode we permute intervals array to the original order
# For cycle mode we permute intervals array to the original arrangement

# ex.:
# a a c c d e
# intervals = [1, 5, 5, 1, 6, 6]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# intervals[inverse_perm] = [1, 2, 1, 4, 5, 5]
# a c c e d a
# result = [1, 5, 1, 6, 5, 5]
result = intervals[inverse_perm]
elif mod == mode.redundant:
# For redundant mode we need to count intervals for the first and last
# appearance of an element

# ex.:
# a a c c d e
# intervals = [1, 5, 2, 1, 5, 4]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# intervals[inverse_perm] = [1, 2, 1, 4, 5, 5]
# a c c e d a
# result = [1, 2, 1, 4, 5, 5]

# Create 2-dimensional array size of (2, len(ar))
# Zero row is for intervals the first appearance of the element and intervals
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Zero row is for intervals the first appearance of the element and intervals
# Zero row is for the intervals of the first appearance of the element and intervals

# for intermediate appearances
# First row will store intervals for the last appearance of the element
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# First row will store intervals for the last appearance of the element
# First row will store only intervals for the last appearance of the elements

result = np.zeros(shape=ar.shape + (2,), dtype=int)

# ex.:
# a a c c d e
# intervals = [1, 5, 2, 1, 5, 4]
# result = [
# [1, 5, 2, 1, 5, 4]
# [0, 0, 0, 0, 0, 0]
# ]
result[:, 0] = intervals

# Set intervals for the last appearance of the element to the first row

# ex.:
# a a c c d e
# perm = [ 0, 5, 1, 2, 4, 3]
# last_mask = [False, True, False, True, True, True]
# perm[last_mask] = [ 5, 2, 4, 3]
# len(ar) - perm[last_mask] = [ 1, 4, 2, 3]
# result = [
# [ 1, 5, 2, 1, 5, 4]
# [ 0, 1, 0, 4, 2, 3]
# ]
result[last_mask, 1] = len(ar) - perm[last_mask]

# Permute intervals array to the original order
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Permute intervals array to the original order
# Permute intervals array to the original arrangement

# ex.:
# a a c c d e
# result = [
# [1, 5, 2, 1, 5, 4]
# [0, 1, 0, 4, 2, 3]
# ]
# inverse_perm = [0, 2, 3, 5, 4, 1]
# result[inverse_perm] = [
# [1, 2, 1, 4, 5, 5]
# [0, 0, 4, 3, 2, 1]
# ]
# a c c e d a
result = result[inverse_perm]

# Flatten result array
# ex.:
# a c c e d a
# result[inverse_perm] = [
# [1, 2, 1, 4, 5, 5]
# [0, 0, 4, 3, 2, 1]
# ]
# result.ravel() = [ 1, 0, 2, 0, 1, 4, 4, 3, 5, 2, 5, 1]
# | a | c | c | e | d | a |
result = result.ravel()

# Exclude zeros from the result
# result = [ 1, 0, 2, 0, 1, 4, 4, 3, 5, 2, 5, 1]
# | a | c | c | e | d | a |

# result[result != 0] = [ 1, 2, 1, 4, 4, 3, 5, 2, 5, 1]
# |a |c | c | e | d | a |
result = result[result != 0]

if bind == binding.end:
# For binding to the end, we need to reverse the result
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# For binding to the end, we need to reverse the result
# For binding to the end, we need to reverse the result back

result = result[::-1]

return result
Loading