# 2017-Sep-10. Created by Maxim T. (Adj 2023-Jan-01)
$scriptPath = Split-Path -parent $MyInvocation.MyCommand.Definition; 
. ($scriptPath + "\psFunctions.ps1");     # Adding script with reusable functions
. ($scriptPath + "\psSetVariables.ps1");  # Adding script to assign values to common variables

$logFile        = $scriptPath + "\Log\" + $MyInvocation.MyCommand.Name.Replace(".ps1",".txt"); 
(Get-Date).ToString("HH:mm:ss") + " --- Starting script " + $MyInvocation.MyCommand.Name | Out-File $logFile -Encoding OEM; # starting logging to file.
$logSummary = (Get-Date).ToString("HH:mm:ss") + " Script: Yahoo Historical".PadRight(28);
$quoteIDFile = $dataRootFolder + "\QuotesIntraDay\YahooIntraday.txt"; if (Test-Path $quoteIDFile) { Remove-Item $quoteIDFile;} # Removing intraday file before each load

$listStart = $config.IndexOf("<Yahoo>"); $listEnd = $config.IndexOf("</Yahoo>"); 
if ($listStart -eq -1 -or $listEnd -eq -1 -or $listStart+1 -ge $listEnd) {"<Yahoo> Symbol list is empty. Exiting script." | Out-File $logFile -Encoding OEM -Append; exit(1);}
$symbolList = @($config | Select-Object -Index(($listStart+1)..($listEnd-1))); #list of symbols we will work on
"Symbol count: " + $symbolList.count + ". MinDate: $minDate" | Out-File $logFile -Encoding OEM -Append;

# From 2017-05 Yahoo requires session with cookie and crumb value at the end of URL to return data.Example: https://query1.finance.yahoo.com/v7/finance/download/VYM?period1=1500137940&period2=1502816340&interval=1d&events=history&crumb=nF2PUWr9OBA
$wrCookie=""; $wr=""; $crumbStr="`"CrumbStore`":{`"crumb`":`""; $crumb = "";
$urlCookie = "https://finance.yahoo.com/quote/AAPL/history?p=AAPL"; #This URL will be used to get establish session and get crumb value
for ($i=0;$i -le 2 -and $crumb -eq ""; $i++) { # will attempt to get crumb 3 times
    $wrCookie = Invoke-WebRequest -Uri $urlCookie -SessionVariable websession; # Session with cookie now is in $websession variable. 
    $webCookie = $wrCookie.Content;   # Need to parse content and look for string $crumbStr value.
    $crumbStart = $webCookie.IndexOf($crumbStr); if ($crumbStart -eq 0 -or $crumbStart -eq -1) {"Crumb start not found" | Out-File $logFile -Encoding OEM -Append;  continue;} #Identifing crumb location in file. Get start location of the value;
    $crumb = $webCookie.Substring($crumbStart+$crumbStr.Length,100); # Get 100 characters from the crumb start
    $crumbEnd = $crumb.IndexOf("`"}"); if ($crumbEnd -eq 0 -or $crumbEnd -eq -1) { "Crumb end not found" | Out-File $logFile -Encoding OEM -Append; continue;} # Find crumb string end.
    
    $crumb = $crumb.Substring(0, $crumbEnd);   # ================ At this point we have crumb value in $crumb and web session with cookie established in $websession;
    if (-not($crumb -match "^[a-zA-Z0-9\s]+$") -and $i -ne 2) {$crumb = "";}
}

$urlBase = "https://query1.finance.yahoo.com/v7/finance/download/@Symbol@?period1=@FromDay@&period2=@ToDay@&interval=1d&events=history&crumb=@CRUMB@"
$toDay   = [string] [math]::Floor((get-date -UFormat %s)); #today in unix timestamp
$urlBase = $urlBase.Replace("@CRUMB@", $crumb).Replace("@ToDay@", $toDay);
ForEach($sLine in $symbolList) { # For each symbol
    $ret = GetSymbolInfo $sLine $quotesFolder $minDate; $symbol = $ret[0]; $nextDate = $ret[1]; $symbolMaxDate = $ret[2]; $symbolMaxDateAdj = $ret[3]; $symbolQuoteFile = $ret[4]; $lastQuote = $ret[5];
    "Symbol: $symbol. Next date: $nextDate. Quote file: $symbolQuoteFile" | Out-File $logFile -Encoding OEM -Append;
    if ($nextDate -gt $symbolMaxDate) { "  We already have data up to maximum configuration date of $symbolMaxDate. Will not request new data for this symbol." | Out-File $logFile -Encoding OEM -Append; continue; } 

    # ========== Get quotes from website - Start ==============
    $fromDay = [string] [math]::Floor((get-date $nextDate -UFormat %s));

    $url = $urlBase.Replace("@Symbol@",$symbol).Replace("@FromDay@",$fromDay);
    "  Requesting url: " + $url | Out-File $logFile -Encoding OEM -Append; $webpage = ""; $reqRows=0; 
    try {$reqCount++; $wr = Invoke-WebRequest -Uri $url -WebSession $websession; }
    catch { $reqFailed++; "  " + $symbol + " - Not Found (web err) `r`n" | Out-File $logFile -Encoding OEM -Append; continue; } # if attempt to get webpage failed go to next symbol
    if ($wr.StatusCode -ne 200) {$reqFailed++; "  " + $symbol + " - Returned status code: " + $wr.StatusCode + " `r`n" | Out-File $logFile -Encoding OEM -Append; continue;  }

    $quotesTxt = $wr.Content; # This variable now contains downloaded quotes in text format:    Date,Open,High,Low,Close,Adj Close,Volume    |   2017-07-17,78.830002,78.930000,78.760002,78.839996,78.839996,472700
    if ($quotesTxt.length -le 45) { $reqFailed++; "  " + $symbol + " - Not Found (return empty file) `r`n" | Out-File $logFile -Encoding OEM -Append; continue;} # Check if data received makes sense. If request size is less than 45 bytes (header length is 42), then there is something wrong with data received. Ignoring it, going to next symbol
    if ($quotesTxt.Contains("<html>")) {$reqFailed++; "  " + $symbol + " - Not Found (return html) `r`n" | Out-File $logFile -Encoding OEM -Append; continue;} # Result is html file, ignore such file
    $reqSucceed++; # If reached this point, that means symbol request was successful, go next to copy received data into local file
    # ========== Get quotes from website - End. Quote is in $quotesTxt ==============

    $ql = $quotesTxt.Split("`n") | ? {$_.trim() -ne ""  -and ($_.StartsWith("1") -or $_.StartsWith("2"))} | Sort-Object; # Sometimes duplicate records could come back, need to sort records and not load duplicate values
    $ql | %{$a=$_.Split(","); if($a[0] -ge $nextDate -and $a[0] -gt $lastQuote -and $a[0] -le $symbolMaxDateAdj -and $a[0] -lt $todayYMD -and $a[4] -match "^[\d\.]+$") {$a[0]+","+$a[4]+","+$symbol; $reqRows++; $lastQuote=$a[0]; } }  |
       Out-File $symbolQuoteFile -Encoding OEM -Append #taking just date,Close,Symbol
    if ($lastQuote -lt $nextDate) {"Done: $symbol. No new quotes found. `r`n" | Out-File $logFile -Encoding OEM -Append;} # No new records where loaded
    else {"Done: $symbol (from: $nextDate). Record count: $reqRows. Last quote: $lastQuote `r`n" | Out-File $logFile -Encoding OEM -Append; $reqRowsT+=$reqRows;}
}
$duration = (NEW-TIMESPAN -Start $startTime -End (Get-Date)).TotalSeconds.ToString("#,##0") + " sec.";
(Get-Date).ToString("HH:mm:ss") + " --- Finished. Quotes Requested/Succeed/Failed/Rows: $reqCount/$reqSucceed/$reqFailed/$reqRowsT. Duration: $duration`r`n" | Out-File $logFile -Encoding OEM -append;
$logSummary + ". Quotes Requested/Succeed/Failed/Rows: $reqCount/$reqSucceed/$reqFailed/$reqRowsT. Duration: $duration";
