Skip to content

Commit 56e4312

Browse files
authored
feat: show OCR bounding box (#23717)
* feat: ocr bounding box * bounding boxes * pr feedback * pr feedback * allow copy across text boxes * pr feedback
1 parent f59417c commit 56e4312

File tree

9 files changed

+293
-5
lines changed

9 files changed

+293
-5
lines changed

i18n/en.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,6 +1158,7 @@
11581158
"hide_named_person": "Hide person {name}",
11591159
"hide_password": "Hide password",
11601160
"hide_person": "Hide person",
1161+
"hide_text_recognition": "Hide text recognition",
11611162
"hide_unnamed_people": "Hide unnamed people",
11621163
"home_page_add_to_album_conflicts": "Added {added} assets to album {album}. {failed} assets are already in the album.",
11631164
"home_page_add_to_album_err_local": "Can not add local assets to albums yet, skipping",
@@ -1967,6 +1968,7 @@
19671968
"show_slideshow_transition": "Show slideshow transition",
19681969
"show_supporter_badge": "Supporter badge",
19691970
"show_supporter_badge_description": "Show a supporter badge",
1971+
"show_text_recognition": "Show text recognition",
19701972
"show_text_search_menu": "Show text search menu",
19711973
"shuffle": "Shuffle",
19721974
"sidebar": "Sidebar",
@@ -2037,6 +2039,7 @@
20372039
"tags": "Tags",
20382040
"tap_to_run_job": "Tap to run job",
20392041
"template": "Template",
2042+
"text_recognition": "Text recognition",
20402043
"theme": "Theme",
20412044
"theme_selection": "Theme selection",
20422045
"theme_selection_description": "Automatically set the theme to light or dark based on your browser's system preference",

web/src/lib/actions/zoom-image.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { photoZoomState } from '$lib/stores/zoom-image.store';
22
import { useZoomImageWheel } from '@zoom-image/svelte';
33
import { get } from 'svelte/store';
44

5-
export const zoomImageAction = (node: HTMLElement) => {
5+
export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolean }) => {
66
const { createZoomImage, zoomImageState, setZoomImageState } = useZoomImageWheel();
77

88
createZoomImage(node, {
@@ -14,9 +14,32 @@ export const zoomImageAction = (node: HTMLElement) => {
1414
setZoomImageState(state);
1515
}
1616

17+
// Store original event handlers so we can prevent them when disabled
18+
const wheelHandler = (event: WheelEvent) => {
19+
if (options?.disabled) {
20+
event.stopImmediatePropagation();
21+
}
22+
};
23+
24+
const pointerDownHandler = (event: PointerEvent) => {
25+
if (options?.disabled) {
26+
event.stopImmediatePropagation();
27+
}
28+
};
29+
30+
// Add handlers at capture phase with higher priority
31+
node.addEventListener('wheel', wheelHandler, { capture: true });
32+
node.addEventListener('pointerdown', pointerDownHandler, { capture: true });
33+
1734
const unsubscribes = [photoZoomState.subscribe(setZoomImageState), zoomImageState.subscribe(photoZoomState.set)];
35+
1836
return {
37+
update(newOptions?: { disabled?: boolean }) {
38+
options = newOptions;
39+
},
1940
destroy() {
41+
node.removeEventListener('wheel', wheelHandler, { capture: true });
42+
node.removeEventListener('pointerdown', pointerDownHandler, { capture: true });
2043
for (const unsubscribe of unsubscribes) {
2144
unsubscribe();
2245
}

web/src/lib/components/asset-viewer/asset-viewer.svelte

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
1414
import { closeEditorCofirm } from '$lib/stores/asset-editor.store';
1515
import { assetViewingStore } from '$lib/stores/asset-viewing.store';
16+
import { ocrManager } from '$lib/stores/ocr.svelte';
1617
import { alwaysLoadOriginalVideo, isShowDetail } from '$lib/stores/preferences.store';
1718
import { SlideshowNavigation, SlideshowState, slideshowStore } from '$lib/stores/slideshow.store';
1819
import { user } from '$lib/stores/user.store';
@@ -44,6 +45,7 @@
4445
import CropArea from './editor/crop-tool/crop-area.svelte';
4546
import EditorPanel from './editor/editor-panel.svelte';
4647
import ImagePanoramaViewer from './image-panorama-viewer.svelte';
48+
import OcrButton from './ocr-button.svelte';
4749
import PhotoViewer from './photo-viewer.svelte';
4850
import SlideshowBar from './slideshow-bar.svelte';
4951
import VideoViewer from './video-wrapper-viewer.svelte';
@@ -392,9 +394,13 @@
392394
handlePromiseError(activityManager.init(album.id, asset.id));
393395
}
394396
});
397+
398+
let currentAssetId = $derived(asset.id);
395399
$effect(() => {
396-
if (asset.id) {
397-
handlePromiseError(handleGetAllAlbums());
400+
if (currentAssetId) {
401+
untrack(() => handlePromiseError(handleGetAllAlbums()));
402+
ocrManager.clear();
403+
handlePromiseError(ocrManager.getAssetOcr(currentAssetId));
398404
}
399405
});
400406
</script>
@@ -535,6 +541,7 @@
535541
{playOriginalVideo}
536542
/>
537543
{/if}
544+
538545
{#if $slideshowState === SlideshowState.None && isShared && ((album && album.isActivityEnabled) || activityManager.commentCount > 0) && !activityManager.isLoading}
539546
<div class="absolute bottom-0 end-0 mb-20 me-8">
540547
<ActivityStatus
@@ -547,6 +554,12 @@
547554
/>
548555
</div>
549556
{/if}
557+
558+
{#if $slideshowState === SlideshowState.None && asset.type === AssetTypeEnum.Image && !isShowEditor && ocrManager.hasOcrData}
559+
<div class="absolute bottom-0 end-0 mb-6 me-6">
560+
<OcrButton />
561+
</div>
562+
{/if}
550563
{/key}
551564
{/if}
552565
</div>

web/src/lib/components/asset-viewer/detail-panel.svelte

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@
503503
{/if}
504504

505505
{#if albums.length > 0}
506-
<section class="px-6 pt-6 dark:text-immich-dark-fg">
506+
<section class="px-6 py-6 dark:text-immich-dark-fg">
507507
<p class="uppercase pb-4 text-sm">{$t('appears_in')}</p>
508508
{#each albums as album (album.id)}
509509
<a href={resolve(`${AppRoute.ALBUMS}/${album.id}`)}>
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<script lang="ts">
2+
import type { OcrBox } from '$lib/utils/ocr-utils';
3+
import { calculateBoundingBoxDimensions } from '$lib/utils/ocr-utils';
4+
5+
type Props = {
6+
ocrBox: OcrBox;
7+
};
8+
9+
let { ocrBox }: Props = $props();
10+
11+
const dimensions = $derived(calculateBoundingBoxDimensions(ocrBox.points));
12+
13+
const transform = $derived(
14+
`translate(${dimensions.minX}px, ${dimensions.minY}px) rotate(${dimensions.rotation}deg) skew(${dimensions.skewX}deg, ${dimensions.skewY}deg)`,
15+
);
16+
17+
const transformOrigin = $derived(
18+
`${dimensions.centerX - dimensions.minX}px ${dimensions.centerY - dimensions.minY}px`,
19+
);
20+
</script>
21+
22+
<div class="absolute group left-0 top-0 pointer-events-none">
23+
<!-- Bounding box with CSS transforms -->
24+
<div
25+
class="absolute border-2 border-blue-500 bg-blue-500/10 cursor-pointer pointer-events-auto transition-all group-hover:bg-blue-500/30 group-hover:border-blue-600 group-hover:border-[3px]"
26+
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
27+
></div>
28+
29+
<!-- Text overlay - always rendered but invisible, allows text selection and copy -->
30+
<div
31+
class="absolute flex items-center justify-center text-transparent text-sm px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word select-text group-hover:text-white group-hover:bg-black/75 group-hover:z-10"
32+
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
33+
>
34+
{ocrBox.text}
35+
</div>
36+
</div>
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<script lang="ts">
2+
import { ocrManager } from '$lib/stores/ocr.svelte';
3+
import { IconButton } from '@immich/ui';
4+
import { mdiTextRecognition } from '@mdi/js';
5+
import { t } from 'svelte-i18n';
6+
</script>
7+
8+
<IconButton
9+
title={ocrManager.showOverlay ? $t('hide_text_recognition') : $t('show_text_recognition')}
10+
icon={mdiTextRecognition}
11+
class={"dark {ocrStore.showOverlay ? 'bg-immich-primary text-white dark' : 'dark'}"}
12+
color="secondary"
13+
variant="ghost"
14+
shape="round"
15+
aria-label={$t('text_recognition')}
16+
onclick={() => ocrManager.toggleOcrBoundingBox()}
17+
/>

web/src/lib/components/asset-viewer/photo-viewer.svelte

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,22 @@
22
import { shortcuts } from '$lib/actions/shortcut';
33
import { zoomImageAction } from '$lib/actions/zoom-image';
44
import FaceEditor from '$lib/components/asset-viewer/face-editor/face-editor.svelte';
5+
import OcrBoundingBox from '$lib/components/asset-viewer/ocr-bounding-box.svelte';
56
import BrokenAsset from '$lib/components/assets/broken-asset.svelte';
67
import { assetViewerFadeDuration } from '$lib/constants';
78
import { castManager } from '$lib/managers/cast-manager.svelte';
89
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
910
import { photoViewerImgElement } from '$lib/stores/assets-store.svelte';
1011
import { isFaceEditMode } from '$lib/stores/face-edit.svelte';
12+
import { ocrManager } from '$lib/stores/ocr.svelte';
1113
import { boundingBoxesArray } from '$lib/stores/people.store';
1214
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
1315
import { SlideshowLook, SlideshowState, slideshowLookCssMapping, slideshowStore } from '$lib/stores/slideshow.store';
1416
import { photoZoomState } from '$lib/stores/zoom-image.store';
1517
import { getAssetOriginalUrl, getAssetThumbnailUrl, handlePromiseError } from '$lib/utils';
1618
import { canCopyImageToClipboard, copyImageToClipboard, isWebCompatibleImage } from '$lib/utils/asset-utils';
1719
import { handleError } from '$lib/utils/handle-error';
20+
import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils';
1821
import { getBoundingBox } from '$lib/utils/people-utils';
1922
import { cancelImageUrl } from '$lib/utils/sw-messaging';
2023
import { getAltText } from '$lib/utils/thumbnail-util';
@@ -71,6 +74,14 @@
7174
$boundingBoxesArray = [];
7275
});
7376
77+
let ocrBoxes = $derived(
78+
ocrManager.showOverlay && $photoViewerImgElement
79+
? getOcrBoundingBoxes(ocrManager.data, $photoZoomState, $photoViewerImgElement)
80+
: [],
81+
);
82+
83+
let isOcrActive = $derived(ocrManager.showOverlay);
84+
7485
const preload = (targetSize: AssetMediaSize | 'original', preloadAssets?: TimelineAsset[]) => {
7586
for (const preloadAsset of preloadAssets || []) {
7687
if (preloadAsset.isImage) {
@@ -130,9 +141,15 @@
130141
if ($photoZoomState.currentZoom > 1) {
131142
return;
132143
}
144+
145+
if (ocrManager.showOverlay) {
146+
return;
147+
}
148+
133149
if (onNextAsset && event.detail.direction === 'left') {
134150
onNextAsset();
135151
}
152+
136153
if (onPreviousAsset && event.detail.direction === 'right') {
137154
onPreviousAsset();
138155
}
@@ -235,7 +252,7 @@
235252
</div>
236253
{:else if !imageError}
237254
<div
238-
use:zoomImageAction
255+
use:zoomImageAction={{ disabled: isOcrActive }}
239256
{...useSwipe(onSwipe)}
240257
class="h-full w-full"
241258
transition:fade={{ duration: haveFadeTransition ? assetViewerFadeDuration : 0 }}
@@ -264,6 +281,10 @@
264281
style="top: {boundingbox.top}px; left: {boundingbox.left}px; height: {boundingbox.height}px; width: {boundingbox.width}px;"
265282
></div>
266283
{/each}
284+
285+
{#each ocrBoxes as ocrBox (ocrBox.id)}
286+
<OcrBoundingBox {ocrBox} />
287+
{/each}
267288
</div>
268289

269290
{#if isFaceEditMode.value}

web/src/lib/stores/ocr.svelte.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { getAssetOcr } from '@immich/sdk';
2+
3+
export type OcrBoundingBox = {
4+
id: string;
5+
assetId: string;
6+
x1: number;
7+
y1: number;
8+
x2: number;
9+
y2: number;
10+
x3: number;
11+
y3: number;
12+
x4: number;
13+
y4: number;
14+
boxScore: number;
15+
textScore: number;
16+
text: string;
17+
};
18+
19+
class OcrManager {
20+
#data = $state<OcrBoundingBox[]>([]);
21+
showOverlay = $state(false);
22+
hasOcrData = $state(false);
23+
24+
get data() {
25+
return this.#data;
26+
}
27+
28+
async getAssetOcr(id: string) {
29+
this.#data = await getAssetOcr({ id });
30+
this.hasOcrData = this.#data.length > 0;
31+
}
32+
33+
clear() {
34+
this.#data = [];
35+
this.showOverlay = false;
36+
this.hasOcrData = false;
37+
}
38+
39+
toggleOcrBoundingBox() {
40+
this.showOverlay = !this.showOverlay;
41+
}
42+
}
43+
44+
export const ocrManager = new OcrManager();

0 commit comments

Comments
 (0)