Skip to content

Pictures are not included #1

@Sfre12

Description

@Sfre12

Hi Lendy007,

First of all, this is a great idea and excellent work. I’ve been searching for something like this for years.

Today, I tried to convert my 2,600 notes from Google to OneNote. Overall, the process worked well, but the images included in my Google notes were not transferred.

For that reason, I took your script and made a few small improvements. It’s now working properly, including the image transfer.

I’ve attached the updated code with my changes. Could you please review it and update your script accordingly? I’m not very familiar with GitHub yet, so I’m not confident enough to submit the changes myself.

Thank you very much for your work and support.

Best regards,
Stephan

$NotebookPath = "c:\Keep2OneNote\OneNote\"
$sourcePath   = "c:\Keep2OneNote\TakeOut\"   # <-- ggf. anpassen
$NotebookName = "Keep2OneNoteFinal"

# COM-Objekt erstellen
$OneNote = New-Object -ComObject OneNote.Application

# Notebook oeffnen/erstellen
$xml = ""
$OneNote.OpenHierarchy($NotebookPath, "", [ref]$xml, [Microsoft.Office.Interop.OneNote.CreateFileType]::cftNotebook)

$SectionPath = $NotebookPath + $NotebookName + '.one'

# Section oeffnen/erstellen
$xmlSection = ""
$OneNote.OpenHierarchy($SectionPath, "", [ref]$xmlSection, [Microsoft.Office.Interop.OneNote.CreateFileType]::cftSection)

# Assembly laden
$null = [Reflection.Assembly]::LoadWithPartialName('System.Xml.Linq')

# -----------------------------------------------------------------------
# Hilfsfunktion: Bild als Base64-XElement fuer OneNote bauen
# -----------------------------------------------------------------------
function New-OneNoteImageElement {
    param(
        [System.Xml.Linq.XNamespace]$ns,
        [string]$imagePath
    )

    if (-not (Test-Path $imagePath)) {
        Write-Warning "Bild nicht gefunden: $imagePath"
        return $null
    }

    # Dateierweiterung pruefen - nur Bildformate erlaubt
    $ext = [System.IO.Path]::GetExtension($imagePath).ToLower().TrimStart('.')
    $supportedFormats = @('jpg','jpeg','png','gif','bmp','webp','emf','tif','tiff')
    if ($ext -notin $supportedFormats) {
        Write-Warning "Nicht unterstuetztes Format uebersprungen: $([System.IO.Path]::GetFileName($imagePath)) ($ext)"
        return $null
    }

    try {
        $imgBytes = [System.IO.File]::ReadAllBytes($imagePath)
        $b64      = [Convert]::ToBase64String($imgBytes)

        # OneNote akzeptiert nur: auto | png | emf | jpg
        $format = switch ($ext) {
            'jpg'  { 'jpg' }
            'jpeg' { 'jpg' }
            'png'  { 'png' }
            'gif'  { 'png' }    # kein gif in OneNote
            'bmp'  { 'png' }    # kein bmp in OneNote
            'webp' { 'png' }    # kein webp in OneNote
            'emf'  { 'emf' }
            'tif'  { 'png' }
            'tiff' { 'png' }
            default { 'jpg' }
        }

        $imageEl = New-Object System.Xml.Linq.XElement($ns + "Image")
        $imageEl.SetAttributeValue("format", $format)

        $dataEl = New-Object System.Xml.Linq.XElement($ns + "Data")
        $dataEl.Value = $b64

        $imageEl.Add($dataEl)
        return $imageEl

    } catch {
        Write-Warning "Fehler beim Einbetten von Bild '$imagePath': $_"
        return $null
    }
}

# -----------------------------------------------------------------------
# Hilfsfunktion: Bildpfade aus JSON-Metadaten holen
# -----------------------------------------------------------------------
function Get-ImagePathsFromJson {
    param([string]$htmlFilePath)

    $jsonPath = [System.IO.Path]::ChangeExtension($htmlFilePath, '.json')
    if (-not (Test-Path $jsonPath)) { return @() }

    try {
        $json = [System.IO.File]::ReadAllText($jsonPath, [System.Text.Encoding]::UTF8) | ConvertFrom-Json
        $paths = @()
        if ($json.attachments) {
            foreach ($att in $json.attachments) {
                if ($att.filePath) { $paths += $att.filePath }
            }
        }
        return $paths
    } catch {
        Write-Warning "JSON-Lesefehler fuer '$jsonPath': $_"
        return @()
    }
}

# -----------------------------------------------------------------------
# Hauptschleife
# -----------------------------------------------------------------------
$files = Get-ChildItem $sourcePath -Filter *.html
Write-Host "Gefundene HTML-Dateien: $($files.Count)"

$counter    = 0
$imgCounter = 0
$skipCounter = 0

foreach ($file in $files) {
    try {
        $counter++
        Write-Host "[$counter/$($files.Count)] Verarbeite: $($file.Name)"

        # FIX: .NET statt Get-Content - vermeidet den Encoding-Parameter-Bug in PS 5.1
        $source = $null
        try {
            $source = [System.IO.File]::ReadAllText($file.FullName, [System.Text.Encoding]::UTF8)
        } catch {
            # Fallback auf Windows-1252 (ANSI/Latin-1)
            try {
                $source = [System.IO.File]::ReadAllText($file.FullName, [System.Text.Encoding]::GetEncoding(1252))
            } catch {
                Write-Warning "Datei konnte nicht gelesen werden: $($file.Name) - $_"
                $skipCounter++
                continue
            }
        }

        # Neue Seite anlegen
        $newpageID = ''
        $OneNote.CreateNewPage(
            $xmlSection,
            [ref]$newpageID,
            [Microsoft.Office.Interop.OneNote.NewPageStyle]::npsBlankPageWithTitle
        )

        # Seiten-XML holen
        $NewPageXML = ''
        $OneNote.GetPageContent(
            $newpageID,
            [ref]$NewPageXML,
            [Microsoft.Office.Interop.OneNote.PageInfo]::piAll
        )

        $xDoc = [System.Xml.Linq.XDocument]::Parse($NewPageXML)
        $ns   = $xDoc.Root.Name.Namespace

        # QuickStyleDef (Titelstil)
        $quickstyledef = ($xDoc.Descendants() | Where-Object { $_.Name.LocalName -eq 'QuickStyleDef' }) | Select-Object -First 1
        if ($quickstyledef) {
            $quickstyledef.SetAttributeValue('font', 'Source Sans Pro Black')
            $quickstyledef.SetAttributeValue('fontColor', '#80be6a')
        }

        # Datum aus Datei-Metadaten
        $lastModifiedDate = ((Get-Item $file.FullName).LastWriteTimeUtc).ToString("s")
        if ($xDoc.FirstNode.Attribute('dateTime'))         { $xDoc.FirstNode.Attribute('dateTime').Value         = $lastModifiedDate }
        if ($xDoc.FirstNode.Attribute('lastModifiedTime')) { $xDoc.FirstNode.Attribute('lastModifiedTime').Value = $lastModifiedDate }

        # Titel (erstes <T>-Element)
        $title = ($xDoc.Descendants() | Where-Object { $_.Name.LocalName -eq 'T' }) | Select-Object -First 1
        if (-not $title) {
            Write-Warning "Kein Titel-Element in $($file.Name), ueberspringe..."
            $skipCounter++
            continue
        }
        $title.Value = $file.BaseName

        $titleContainer = ($xDoc.Descendants() | Where-Object { $_.Name.LocalName -eq 'Title' }) | Select-Object -First 1

        # HTML parsen via COM
        $html = New-Object -ComObject "HTMLFile"
        $html.IHTMLDocument2_write($source)

        $OutlineNode    = New-Object System.Xml.Linq.XElement($ns + "Outline")
        $OEChildrenNode = New-Object System.Xml.Linq.XElement($ns + "OEChildren")

        # DOM-Navigation mit Null-Pruefung
        $bodyNodes = $null
        try { $bodyNodes = $html.childNodes[1].childNodes[1].childNodes }
        catch { Write-Warning "DOM-Navigation fehlgeschlagen fuer $($file.Name)" }

        # Bereits eingebettete Bilder tracken (gegen Doppel-Import)
        $embeddedImages = @{}

        if ($bodyNodes) {
            $noteNodes = $bodyNodes | Where-Object { $_.className -like 'note*' }

            foreach ($noteNode in $noteNodes) {
                foreach ($child in $noteNode.childNodes) {
                    switch ($child.className) {

                        'title' {
                            if ($child.innerText) { $title.Value = $child.innerText }
                        }

                        'content' {
                            if ($child.innerHTML) {
                                $HTMLBlock = New-Object System.Xml.Linq.XElement($ns + "HTMLBlock")
                                $HTMLData  = New-Object System.Xml.Linq.XElement($ns + "Data")
                                $HTMLData.Add((New-Object System.Xml.Linq.XCData($child.innerHTML)))
                                $HTMLBlock.Add($HTMLData)
                                $OEChildrenNode.Add($HTMLBlock)
                            }
                        }

                        'attachments' {
                            # Weg 1: <img>-Tags direkt aus HTML
                            try {
                                $imgTags = $child.getElementsByTagName('img')
                                foreach ($imgTag in $imgTags) {
                                    $imgSrc      = $imgTag.getAttribute('src')
                                    $imgFileName = [System.IO.Path]::GetFileName([Uri]::UnescapeDataString($imgSrc))
                                    $imgFullPath = Join-Path $file.DirectoryName $imgFileName

                                    if ($embeddedImages.ContainsKey($imgFileName)) { continue }

                                    $imageEl = New-OneNoteImageElement -ns $ns -imagePath $imgFullPath
                                    if ($imageEl) {
                                        $OENode = New-Object System.Xml.Linq.XElement($ns + "OE")
                                        $OENode.Add($imageEl)
                                        $OEChildrenNode.Add($OENode)
                                        $embeddedImages[$imgFileName] = $true
                                        $imgCounter++
                                        Write-Host "  -> Bild eingebettet (HTML): $imgFileName"
                                    }
                                }
                            } catch {
                                Write-Warning "  img-Tag Verarbeitung fehlgeschlagen: $_"
                            }
                        }
                    }
                }
            }
        }

        # Weg 2: Bilder aus JSON-Metadaten (Fallback / Ergaenzung)
        $jsonImages = Get-ImagePathsFromJson -htmlFilePath $file.FullName
        foreach ($relPath in $jsonImages) {
            $imgFileName = [System.IO.Path]::GetFileName($relPath)

            # Nicht doppelt einbetten
            if ($embeddedImages.ContainsKey($imgFileName)) { continue }

            $imgFullPath = if ([System.IO.Path]::IsPathRooted($relPath)) {
                $relPath
            } else {
                Join-Path $file.DirectoryName $relPath
            }

            $imageEl = New-OneNoteImageElement -ns $ns -imagePath $imgFullPath
            if ($imageEl) {
                $OENode = New-Object System.Xml.Linq.XElement($ns + "OE")
                $OENode.Add($imageEl)
                $OEChildrenNode.Add($OENode)
                $embeddedImages[$imgFileName] = $true
                $imgCounter++
                Write-Host "  -> Bild eingebettet (JSON): $imgFileName"
            }
        }

        # Outline in Seite einhaengen
        if ($OEChildrenNode.HasElements -and $titleContainer) {
            $OutlineNode.Add($OEChildrenNode)
            $titleContainer.AddAfterSelf($OutlineNode)
        }

        $OneNote.UpdatePageContent($xDoc.ToString())

    } catch {
        Write-Warning "FEHLER bei $($file.Name): $_"
    }
}

Write-Host ""
Write-Host "Fertig! $counter Notizen verarbeitet, $imgCounter Bilder eingebettet, $skipCounter uebersprungen."

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions