I was using the docker container deeppavlov/base-cpu
But, after realizing that the main docker container has a new version and also supports CPU, I’ve just switched to deeppavlov/deeppavlov
Unfortunately, this issue still exists:
Traceback (most recent call last):
File “/usr/local/lib/python3.10/site-packages/starlette/middleware/base.py”, line 106, in call
response = await self.dispatch_func(request, call_next)
File “/usr/local/lib/python3.10/site-packages/deeppavlov/utils/server/metrics.py”, line 51, in dispatch
response = await call_next(request)
File “/usr/local/lib/python3.10/site-packages/starlette/middleware/base.py”, line 80, in call_next
raise app_exc
File “/usr/local/lib/python3.10/site-packages/starlette/middleware/base.py”, line 69, in coro
await self.app(scope, receive_or_disconnect, send_no_error)
File “/usr/local/lib/python3.10/site-packages/starlette/middleware/exceptions.py”, line 79, in call
raise exc
File “/usr/local/lib/python3.10/site-packages/starlette/middleware/exceptions.py”, line 68, in call
await self.app(scope, receive, sender)
File “/usr/local/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py”, line 21, in call
raise e
File “/usr/local/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py”, line 18, in call
await self.app(scope, receive, send)
File “/usr/local/lib/python3.10/site-packages/starlette/routing.py”, line 706, in call
await route.handle(scope, receive, send)
File “/usr/local/lib/python3.10/site-packages/starlette/routing.py”, line 276, in handle
await self.app(scope, receive, send)
File “/usr/local/lib/python3.10/site-packages/starlette/routing.py”, line 66, in app
response = await func(request)
File “/usr/local/lib/python3.10/site-packages/fastapi/routing.py”, line 237, in app
raw_response = await run_endpoint_function(
File “/usr/local/lib/python3.10/site-packages/fastapi/routing.py”, line 163, in run_endpoint_function
return await dependant.call(**values)
File “/usr/local/lib/python3.10/site-packages/deeppavlov/utils/server/server.py”, line 211, in answer
return await loop.run_in_executor(None, interact, model, item.dict())
File “/usr/local/lib/python3.10/concurrent/futures/thread.py”, line 58, in run
result = self.fn(*self.args, **self.kwargs)
File “/usr/local/lib/python3.10/site-packages/deeppavlov/utils/server/server.py”, line 154, in interact
prediction = model(*model_args)
File “/usr/local/lib/python3.10/site-packages/deeppavlov/core/common/chainer.py”, line 207, in call
return self._compute(*args, param_names=self.in_x, pipe=self.pipe, targets=self.out_params)
File “/usr/local/lib/python3.10/site-packages/deeppavlov/core/common/chainer.py”, line 230, in _compute
res = component.call(*x)
File “/usr/local/lib/python3.10/site-packages/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py”, line 568, in call
raise RuntimeError(f"input sequence after bert tokenization"
RuntimeError: input sequence after bert tokenization shouldn’t exceed 512 tokens.
During handling of the above exception, another exception occurred:
- Exception Group Traceback (most recent call last):
| File “/usr/local/lib/python3.10/site-packages/uvicorn/protocols/http/h11_impl.py”, line 404, in run_asgi
| result = await app( # type: ignore[func-returns-value]
| File “/usr/local/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py”, line 78, in call
| return await self.app(scope, receive, send)
| File “/usr/local/lib/python3.10/site-packages/fastapi/applications.py”, line 270, in call
| await super().call(scope, receive, send)
| File “/usr/local/lib/python3.10/site-packages/starlette/applications.py”, line 124, in call
| await self.middleware_stack(scope, receive, send)
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/errors.py”, line 184, in call
| raise exc
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/errors.py”, line 162, in call
| await self.app(scope, receive, _send)
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/cors.py”, line 84, in call
| await self.app(scope, receive, send)
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/base.py”, line 104, in call
| async with anyio.create_task_group() as task_group:
| File “/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py”, line 678, in aexit
| raise BaseExceptionGroup(
| exceptiongroup.ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception)
±±--------------- 1 ----------------
| Traceback (most recent call last):
| File “/usr/local/lib/python3.10/site-packages/anyio/streams/memory.py”, line 97, in receive
| return self.receive_nowait()
| File “/usr/local/lib/python3.10/site-packages/anyio/streams/memory.py”, line 92, in receive_nowait
| raise WouldBlock
| anyio.WouldBlock
|
| During handling of the above exception, another exception occurred:
|
| Traceback (most recent call last):
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/base.py”, line 77, in call_next
| message = await recv_stream.receive()
| File “/usr/local/lib/python3.10/site-packages/anyio/streams/memory.py”, line 112, in receive
| raise EndOfStream
| anyio.EndOfStream
|
| During handling of the above exception, another exception occurred:
|
| Traceback (most recent call last):
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/base.py”, line 106, in call
| response = await self.dispatch_func(request, call_next)
| File “/usr/local/lib/python3.10/site-packages/deeppavlov/utils/server/metrics.py”, line 51, in dispatch
| response = await call_next(request)
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/base.py”, line 80, in call_next
| raise app_exc
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/base.py”, line 69, in coro
| await self.app(scope, receive_or_disconnect, send_no_error)
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/exceptions.py”, line 79, in call
| raise exc
| File “/usr/local/lib/python3.10/site-packages/starlette/middleware/exceptions.py”, line 68, in call
| await self.app(scope, receive, sender)
| File “/usr/local/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py”, line 21, in call
| raise e
| File “/usr/local/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py”, line 18, in call
| await self.app(scope, receive, send)
| File “/usr/local/lib/python3.10/site-packages/starlette/routing.py”, line 706, in call
| await route.handle(scope, receive, send)
| File “/usr/local/lib/python3.10/site-packages/starlette/routing.py”, line 276, in handle
| await self.app(scope, receive, send)
| File “/usr/local/lib/python3.10/site-packages/starlette/routing.py”, line 66, in app
| response = await func(request)
| File “/usr/local/lib/python3.10/site-packages/fastapi/routing.py”, line 237, in app
| raw_response = await run_endpoint_function(
| File “/usr/local/lib/python3.10/site-packages/fastapi/routing.py”, line 163, in run_endpoint_function
| return await dependant.call(**values)
| File “/usr/local/lib/python3.10/site-packages/deeppavlov/utils/server/server.py”, line 211, in answer
| return await loop.run_in_executor(None, interact, model, item.dict())
| File “/usr/local/lib/python3.10/concurrent/futures/thread.py”, line 58, in run
| result = self.fn(*self.args, **self.kwargs)
| File “/usr/local/lib/python3.10/site-packages/deeppavlov/utils/server/server.py”, line 154, in interact
| prediction = model(*model_args)
| File “/usr/local/lib/python3.10/site-packages/deeppavlov/core/common/chainer.py”, line 207, in call
| return self._compute(*args, param_names=self.in_x, pipe=self.pipe, targets=self.out_params)
| File “/usr/local/lib/python3.10/site-packages/deeppavlov/core/common/chainer.py”, line 230, in _compute
| res = component.call(*x)
| File “/usr/local/lib/python3.10/site-packages/deeppavlov/models/preprocessors/torch_transformers_preprocessor.py”, line 568, in call
| raise RuntimeError(f"input sequence after bert tokenization"
| RuntimeError: input sequence after bert tokenization shouldn’t exceed 512 tokens.
±-----------------------------------
I was also looking to see if I could have my application (the caller) to somehow tokenize the words and punctuation, and then only send the first 512, but the thing is that it’s hard to preserve the spacing, and even if I send 512, it somehow still passes that limit in the model, crashing anyway.
I feel like I’m trying to reinvent the wheel.
Can’t we have the API and/or the model just silently (or by setting a flag/parameter in the input) truncate the text input past 512 tokens?
Thank you very much!