BASH Post Services

Viewing: 1759099453_irs.sh

Try wget https://bash.commongrounds.cc/uploads/1759099453_irs.sh from the console

Raw File Link

#!/bin/bash

# This script takes in XML files from IRS 990 database and exports all the files if unzipped into a single csv file.

# Output CSV file
output="all_xml_data.csv"

# Write header
echo "filename,ein,org_name,address,city,state,zip,tax_period_end,total_revenue,total_expenses,net_assets_eoy" > "$output"

# Find all XML files recursively and process them
find . -type f -name "*_public.xml" -print0 | while IFS= read -r -d '' file; do
  python3 - <<EOF >> "$output"
import xml.etree.ElementTree as ET
import os

file_path = '$file'
filename = os.path.basename(file_path)

try:
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Handle namespaces if present
    if root.tag.startswith('{'):
        uri = root.tag.split('}')[0][1:]
        ns = {'irs': uri}
    else:
        ns = {}

    def find_text(tag):
        path = f'.//irs:{tag}' if 'irs' in ns else f'.//{tag}'
        elem = root.find(path, ns)
        return elem.text.strip() if elem is not None and elem.text else ''

    # Get return type
    return_type_tag = 'ReturnTypeCd'
    return_type_path = f'.//irs:{return_type_tag}' if 'irs' in ns else f'.//{return_type_tag}'
    return_type_elem = root.find(return_type_path, ns)
    return_type = return_type_elem.text.strip() if return_type_elem else ''

    # Determine financial tags based on return type
    if return_type == '990':
        rev_tag = 'CYTotalRevenueAmt'
        exp_tag = 'CYTotalExpensesAmt'
        net_tag = 'NetAssetsOrFundBalancesEOYAmt'
    elif return_type == '990EZ':
        rev_tag = 'TotalRevenueAmt'
        exp_tag = 'TotalExpensesAmt'
        net_tag = 'NetAssetsOrFundBalancesEOYAmt'
    else:
        rev_tag = 'TotalRevenueAmt'  # Fallback
        exp_tag = 'TotalExpensesAmt'
        net_tag = 'NetAssetsOrFundBalancesEOYAmt'

    ein = find_text('EIN')

    # Organization name (concat Line1 and Line2)
    name1 = find_text('BusinessNameLine1Txt')
    name2 = find_text('BusinessNameLine2Txt')
    org_name = name1 + (' ' + name2 if name2 else '')

    # Address (concat Line1 and Line2)
    addr1 = find_text('AddressLine1Txt')
    addr2 = find_text('AddressLine2Txt')
    address = addr1 + (' ' + addr2 if addr2 else '')

    city = find_text('CityNm')
    state = find_text('StateAbbreviationCd')
    zipcode = find_text('ZIPCd')
    tax_period = find_text('TaxPeriodEndDt')

    total_revenue = find_text(rev_tag)
    total_expenses = find_text(exp_tag)
    net_assets = find_text(net_tag)

    # Escape double quotes for CSV (do not remove commas, as fields are quoted)
    def esc(s):
        return s.replace('"', '""')

    print(f'"{esc(filename)}","{ein}","{esc(org_name)}","{esc(address)}","{esc(city)}","{state}","{zipcode}","{tax_period}","{total_revenue}","{total_expenses}","{net_assets}"')
except Exception as e:
    print(f'"{filename}","","","","","","","","","",""')  # Output empty row with filename on error
EOF
done
BASH to Home