Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions casparser/process/cas_detailed.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
TRANSACTION_RE2,
TRANSACTION_RE3,
TRANSACTION_RE4,
TRANSACTION_RE5,
VALUATION_RE,
)
from .utils import isin_search
Expand Down Expand Up @@ -128,7 +129,7 @@ def get_parsed_scheme_name(scheme) -> str:
return re.sub(r"[^a-zA-Z0-9_)]+$", "", scheme).strip()


def parse_transaction(line) -> Optional[ParsedTransaction]:
def parse_transaction(line, pending_description: Optional[str] = None) -> Optional[ParsedTransaction]:
for regex in (TRANSACTION_RE1, TRANSACTION_RE2, TRANSACTION_RE3, TRANSACTION_RE4):
if m := re.search(regex, line, re.DOTALL | re.MULTILINE | re.I):
groups = m.groups()
Expand All @@ -149,6 +150,12 @@ def parse_transaction(line) -> Optional[ParsedTransaction]:
if date is not None:
return ParsedTransaction(date, description, amount, units, nav, balance)

# Try TRANSACTION_RE5 for first transaction without description
# (description is on previous line with "Opening Unit Balance")
if pending_description and (m := re.search(TRANSACTION_RE5, line, re.DOTALL | re.MULTILINE | re.I)):
date, amount, units, nav, balance = m.groups()
return ParsedTransaction(date, pending_description, amount, units, nav, balance)


def process_detailed_text(text):
"""
Expand All @@ -163,6 +170,7 @@ def process_detailed_text(text):
current_folio = None
current_amc = None
curr_scheme_data: Optional[Scheme] = None
pending_first_txn_desc: Optional[str] = None # For first transaction whose description is on Opening Unit Balance line
lines = text.split("\u2029")
for idx, line in enumerate(lines):
# Parse schemes with long names (single line) effectively pushing
Expand Down Expand Up @@ -237,7 +245,11 @@ def process_detailed_text(text):
if m := re.search(NOMINEE_RE, line, re.I | re.DOTALL):
curr_scheme_data.nominees.extend([x.strip() for x in m.groups() if x.strip()])
if m := re.search(OPEN_UNITS_RE, line):
curr_scheme_data.open = Decimal(m.group(1).replace(",", "_"))
# group(1) is optional description before "Opening Unit Balance" (for first transaction)
# group(2) is the opening unit balance value
if m.group(1):
pending_first_txn_desc = m.group(1).strip()
curr_scheme_data.open = Decimal(m.group(2).replace(",", "_"))
curr_scheme_data.close_calculated = curr_scheme_data.open
continue
if m := re.search(CLOSE_UNITS_RE, line):
Expand All @@ -255,7 +267,7 @@ def process_detailed_text(text):
if m := re.search(DESCRIPTION_TAIL_RE, line):
description_tail = m.group(1).strip()
line = line.replace(m.group(1), "")
if parsed_txn := parse_transaction(line):
if parsed_txn := parse_transaction(line, pending_first_txn_desc):
date = date_parser.parse(parsed_txn.date).date()
desc = parsed_txn.description.strip()
if description_tail != "":
Expand All @@ -279,6 +291,8 @@ def process_detailed_text(text):
dividend_rate=dividend_rate,
)
)
# Clear pending description after first transaction is parsed
pending_first_txn_desc = None
if curr_scheme_data:
folios[current_folio].schemes.append(curr_scheme_data)
return ProcessedCASData(
Expand Down
5 changes: 4 additions & 1 deletion casparser/process/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
SCHEME_KV_RE = r"""(\w+)\s*:\s*([-\w]+)"""

REGISTRAR_RE = r"^\s*Registrar\s*:\s*(.*)\s*$"
OPEN_UNITS_RE = r"Opening\s+Unit\s+Balance.+?([\d,.]+)"
# Captures optional description before "Opening Unit Balance" (for first transaction)
OPEN_UNITS_RE = r"(?:(.+?)\t\t)?Opening\s+Unit\s+Balance.+?([\d,.]+)"
CLOSE_UNITS_RE = r"Closing\s+Unit\s+Balance.+?([\d,.]+)"
COST_RE = r"Total\s+Cost\s+Value\s*:.+?[INR\s]*([\d,.]+)"
VALUATION_RE = (
Expand All @@ -45,6 +46,8 @@
TRANSACTION_RE3 = rf"{date_re}\t\t([^0-9].*)\t\t{amt_re}\t\t{amt_re}(?:\t\t{amt_re}\t\t{amt_re})*"
# Tax transactions
TRANSACTION_RE4 = rf"{date_re}\t\t([^0-9].*)\t\t{amt_re}(?:\t\t{amt_re}\t\t{amt_re}\t\t{amt_re})*"
# First transaction without description (description is on previous line with Opening Unit Balance)
TRANSACTION_RE5 = rf"{date_re}\t\t{amt_re}\t\t{amt_re}\t\t{amt_re}\t\t{amt_re}"
DESCRIPTION_TAIL_RE = r"(\n.+?)(\t\t|$)"
DIVIDEND_RE = r"(?:div\.|dividend|idcw).+?(reinvest)*.*?@\s*Rs\.\s*([\d\.]+)(?:\s+per\s+unit)?"
SCHEME_TAIL_RE = r"(\n.+?)(?:\t\t|$)"
Expand Down